fake-orcid-analysis/notebooks/01-Exploration.ipynb

25292 lines
943 KiB
Plaintext
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Exploratory analysis"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"TODO:\n",
"- Understanding the reason for fake profiles can bring insight on how to catch them (could be trivial with prior knowledge, e.g., SEO hacking => URLs)\n",
"- Study different cases (e.g. author publishing with empty orcid, author publishing but not on OpenAIRE, etc.)\n",
"- Temporal dimension; is it of any use?\n",
"- Can we access private info thanks to the OpenAIRE-ORCID agreement? No.\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" <script type=\"text/javascript\">\n",
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
" if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
" if (typeof require !== 'undefined') {\n",
" require.undef(\"plotly\");\n",
" requirejs.config({\n",
" paths: {\n",
" 'plotly': ['https://cdn.plot.ly/plotly-latest.min']\n",
" }\n",
" });\n",
" require(['plotly'], function(Plotly) {\n",
" window._Plotly = Plotly;\n",
" });\n",
" }\n",
" </script>\n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import glob\n",
"import ast\n",
"import tldextract\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"import antispam\n",
"import profanity_check\n",
"\n",
"import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n",
"import plotly.express as px\n",
"\n",
"init_notebook_mode(connected=True)\n",
"TOP_N = 0\n",
"TOP_RANGE = [0, 0]\n",
"\n",
"def set_top_n(n):\n",
" global TOP_N, TOP_RANGE\n",
" TOP_N = n\n",
" TOP_RANGE = [-.5, n - 1 + .5]\n",
" \n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Notable solid ORCID iDs for explorative purposes:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"AM = '0000-0002-5193-7851'\n",
"PP = '0000-0002-8588-4196'\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Notable anomalies:"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"JOURNAL = '0000-0003-1815-5732'\n",
"NOINFO = '0000-0001-5009-2052'\n",
"VALID_NO_OA = '0000-0002-5154-6404' # True profile, but not in OpenAIRE\n",
"WORK_MISUSE = '0000-0001-7870-1120'\n",
"# todo: find group-shared ORCiD, if possible"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Notable fake ORCID iDs:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"FAKE_HEAP = {\n",
" 'scaffold': '0000-0001-5004-7761',\n",
" 'whatsapp': '0000-0001-6997-9470',\n",
" 'penis': '0000-0002-3399-7287',\n",
" 'bitcoin': '0000-0002-7518-6845',\n",
" 'fitness': '0000-0002-1234-835X', # URL record + employment\n",
" 'cannabis': '0000-0002-9025-8632', # URL > 70 + works (now REMOVED)\n",
" 'plumber': '0000-0002-1700-8311', # URL > 10 + works\n",
" 'furniture': '0000-0001-7478-4539',\n",
" 'cleaners': '0000-0002-7392-3792'\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load the dataset"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0000-0001-6097-3953</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-03-02 09:29:16.528000+00:00</td>\n",
" <td>2018-03-02 09:43:07.551000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0000-0001-6112-5550</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[v.i. yurtaev; v. yurtaev]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[professor, peoples friendship university of ...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-04-03 07:50:23.358000+00:00</td>\n",
" <td>2020-03-18 09:42:44.753000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0000-0001-6152-2695</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2019-12-11 15:31:56.388000+00:00</td>\n",
" <td>2020-01-28 15:34:17.309000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000-0001-6220-5683</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[research scientist, new york university abu ...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-08-18 12:36:45.307000+00:00</td>\n",
" <td>2020-09-23 13:37:54.180000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0000-0001-7071-8294</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[researcher (academic), universidad de zarago...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2014-03-10 13:22:01.966000+00:00</td>\n",
" <td>2016-06-14 22:17:54.470000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email given_names \\\n",
"0 0000-0001-6097-3953 False False <NA> \n",
"1 0000-0001-6112-5550 True True <NA> \n",
"2 0000-0001-6152-2695 True True <NA> \n",
"3 0000-0001-6220-5683 True True <NA> \n",
"4 0000-0001-7071-8294 True True <NA> \n",
"\n",
" family_name biography other_names primary_email keywords \\\n",
"0 <NA> <NA> NaN <NA> NaN \n",
"1 <NA> <NA> [v.i. yurtaev; v. yurtaev] <NA> NaN \n",
"2 <NA> <NA> NaN <NA> NaN \n",
"3 <NA> <NA> NaN <NA> NaN \n",
"4 <NA> <NA> NaN <NA> NaN \n",
"\n",
" external_ids education employment \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN [[professor, peoples friendship university of ... \n",
"2 NaN NaN NaN \n",
"3 NaN NaN [[research scientist, new york university abu ... \n",
"4 NaN NaN [[researcher (academic), universidad de zarago... \n",
"\n",
" n_works works_source activation_date \\\n",
"0 0 NaN 2018-03-02 09:29:16.528000+00:00 \n",
"1 0 NaN 2018-04-03 07:50:23.358000+00:00 \n",
"2 0 NaN 2019-12-11 15:31:56.388000+00:00 \n",
"3 0 NaN 2015-08-18 12:36:45.307000+00:00 \n",
"4 0 NaN 2014-03-10 13:22:01.966000+00:00 \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids \\\n",
"0 2018-03-02 09:43:07.551000+00:00 0 0 0 0 \n",
"1 2020-03-18 09:42:44.753000+00:00 0 0 0 0 \n",
"2 2020-01-28 15:34:17.309000+00:00 0 0 0 0 \n",
"3 2020-09-23 13:37:54.180000+00:00 0 0 0 0 \n",
"4 2016-06-14 22:17:54.470000+00:00 0 0 0 0 \n",
"\n",
" label primary_email_domain other_email_domains url_domains n_emails \\\n",
"0 False NaN NaN NaN <NA> \n",
"1 False NaN NaN NaN <NA> \n",
"2 False NaN NaN NaN <NA> \n",
"3 False NaN NaN NaN <NA> \n",
"4 False NaN NaN NaN <NA> \n",
"\n",
" n_urls n_ids n_keywords n_education n_employment \n",
"0 <NA> <NA> <NA> <NA> <NA> \n",
"1 <NA> <NA> <NA> <NA> 1 \n",
"2 <NA> <NA> <NA> <NA> <NA> \n",
"3 <NA> <NA> <NA> <NA> 1 \n",
"4 <NA> <NA> <NA> <NA> 2 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"parts = glob.glob('../data/processed/dataset.pkl.*')\n",
"\n",
"df = pd.concat((pd.read_pickle(part) for part in sorted(parts)))\n",
"df.head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Notable profiles inspection"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3073261</th>\n",
" <td>0000-0002-5193-7851</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>andrea</td>\n",
" <td>mannocci</td>\n",
" <td>data scientist &amp; researcher; scholarly knowled...</td>\n",
" <td>NaN</td>\n",
" <td>andrea.mannocci@isti.cnr.it</td>\n",
" <td>[science of science, open science, research in...</td>\n",
" <td>[[scopus author id, 55233589900]]</td>\n",
" <td>[[information engineering, ph.d., università d...</td>\n",
" <td>[[research associate, istituto di scienza e te...</td>\n",
" <td>37</td>\n",
" <td>[scopus - elsevier, crossref metadata search, ...</td>\n",
" <td>2017-09-12 14:28:33.467000+00:00</td>\n",
" <td>2021-03-17 15:40:07.776000+00:00</td>\n",
" <td>34</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>60</td>\n",
" <td>True</td>\n",
" <td>isti.cnr.it</td>\n",
" <td>NaN</td>\n",
" <td>[github.io, twitter.com, linkedin.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"3073261 0000-0002-5193-7851 True True \n",
"\n",
" given_names family_name \\\n",
"3073261 andrea mannocci \n",
"\n",
" biography other_names \\\n",
"3073261 data scientist & researcher; scholarly knowled... NaN \n",
"\n",
" primary_email \\\n",
"3073261 andrea.mannocci@isti.cnr.it \n",
"\n",
" keywords \\\n",
"3073261 [science of science, open science, research in... \n",
"\n",
" external_ids \\\n",
"3073261 [[scopus author id, 55233589900]] \n",
"\n",
" education \\\n",
"3073261 [[information engineering, ph.d., università d... \n",
"\n",
" employment n_works \\\n",
"3073261 [[research associate, istituto di scienza e te... 37 \n",
"\n",
" works_source \\\n",
"3073261 [scopus - elsevier, crossref metadata search, ... \n",
"\n",
" activation_date last_update_date \\\n",
"3073261 2017-09-12 14:28:33.467000+00:00 2021-03-17 15:40:07.776000+00:00 \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"3073261 34 0 0 60 True isti.cnr.it \n",
"\n",
" other_email_domains url_domains n_emails \\\n",
"3073261 NaN [github.io, twitter.com, linkedin.com] <NA> \n",
"\n",
" n_urls n_ids n_keywords n_education n_employment \n",
"3073261 3 1 5 4 5 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['orcid'] == AM]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9887272</th>\n",
" <td>0000-0001-6997-9470</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>other</td>\n",
" <td>whatsapp</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[whatsapp gb apk, whatsapp gb, whatsapp gb bai...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-10-07 10:37:12.237000+00:00</td>\n",
" <td>2020-10-08 02:32:03.935000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[otherwhatsapp.com, im-creator.com, facebook.c...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>27</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>4</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"9887272 0000-0001-6997-9470 True True \n",
"\n",
" given_names family_name biography other_names primary_email \\\n",
"9887272 other whatsapp <NA> NaN <NA> \n",
"\n",
" keywords external_ids \\\n",
"9887272 [whatsapp gb apk, whatsapp gb, whatsapp gb bai... NaN \n",
"\n",
" education employment n_works works_source \\\n",
"9887272 NaN NaN 0 NaN \n",
"\n",
" activation_date last_update_date \\\n",
"9887272 2020-10-07 10:37:12.237000+00:00 2020-10-08 02:32:03.935000+00:00 \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"9887272 0 0 0 0 False NaN \n",
"\n",
" other_email_domains \\\n",
"9887272 NaN \n",
"\n",
" url_domains n_emails n_urls \\\n",
"9887272 [otherwhatsapp.com, im-creator.com, facebook.c... <NA> 27 \n",
"\n",
" n_ids n_keywords n_education n_employment \n",
"9887272 <NA> 4 <NA> <NA> "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['orcid'] == FAKE_HEAP['whatsapp']]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"orcid 10989649\n",
"verified_email 10989649\n",
"verified_primary_email 10989649\n",
"given_names 10959039\n",
"family_name 10671715\n",
"biography 354015\n",
"other_names 554684\n",
"primary_email 124722\n",
"keywords 649637\n",
"external_ids 1308598\n",
"education 2441645\n",
"employment 2680488\n",
"n_works 10989649\n",
"works_source 2740939\n",
"activation_date 10989649\n",
"last_update_date 10989649\n",
"n_doi 10989649\n",
"n_arxiv 10989649\n",
"n_pmc 10989649\n",
"n_other_pids 10989649\n",
"label 10989649\n",
"primary_email_domain 124722\n",
"other_email_domains 48615\n",
"url_domains 715067\n",
"n_emails 48615\n",
"n_urls 715067\n",
"n_ids 1308598\n",
"n_keywords 649637\n",
"n_education 2441645\n",
"n_employment 2680488\n",
"dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.count()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 10989649\n",
"unique 10989649\n",
"top 0000-0002-6591-4060\n",
"freq 1\n",
"Name: orcid, dtype: object"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['orcid'].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Primary email"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 124722\n",
"unique 124718\n",
"top andycheng2026@163.com\n",
"freq 2\n",
"Name: primary_email, dtype: object"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['primary_email'].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Dupe emails"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1681787 opercin@erbakan.edu.tr\n",
"5590332 patrick.davey@monash.edu\n",
"9316843 maykin@owasp.org\n",
"10375852 andycheng2026@163.com\n",
"Name: primary_email, dtype: string"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['primary_email'].dropna().loc[df['primary_email'].duplicated()]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7543981</th>\n",
" <td>0000-0002-0836-2271</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>maykin</td>\n",
" <td>warasart</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>maykin@owasp.org</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-09-15 04:43:55.709000+00:00</td>\n",
" <td>2020-09-15 05:17:28.509000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>owasp.org</td>\n",
" <td>[dga.or.th]</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9316843</th>\n",
" <td>0000-0001-9855-1676</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>maykin</td>\n",
" <td>warasart</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>maykin@owasp.org</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-10-23 17:51:51.925000+00:00</td>\n",
" <td>2021-01-01 15:00:52.053000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>owasp.org</td>\n",
" <td>[dga.or.th, ieee.org]</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"7543981 0000-0002-0836-2271 True True \n",
"9316843 0000-0001-9855-1676 True True \n",
"\n",
" given_names family_name biography other_names primary_email \\\n",
"7543981 maykin warasart <NA> NaN maykin@owasp.org \n",
"9316843 maykin warasart <NA> NaN maykin@owasp.org \n",
"\n",
" keywords external_ids education employment n_works works_source \\\n",
"7543981 NaN NaN NaN NaN 0 NaN \n",
"9316843 NaN NaN NaN NaN 0 NaN \n",
"\n",
" activation_date last_update_date \\\n",
"7543981 2020-09-15 04:43:55.709000+00:00 2020-09-15 05:17:28.509000+00:00 \n",
"9316843 2020-10-23 17:51:51.925000+00:00 2021-01-01 15:00:52.053000+00:00 \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"7543981 0 0 0 0 False owasp.org \n",
"9316843 0 0 0 0 False owasp.org \n",
"\n",
" other_email_domains url_domains n_emails n_urls n_ids \\\n",
"7543981 [dga.or.th] NaN 1 <NA> <NA> \n",
"9316843 [dga.or.th, ieee.org] NaN 2 <NA> <NA> \n",
"\n",
" n_keywords n_education n_employment \n",
"7543981 <NA> <NA> <NA> \n",
"9316843 <NA> <NA> <NA> "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['primary_email'] == 'maykin@owasp.org']"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>347852</th>\n",
" <td>0000-0002-2232-9638</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>osman</td>\n",
" <td>perçin</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>opercin@erbakan.edu.tr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-01-12 13:47:55.549000+00:00</td>\n",
" <td>2020-01-27 07:38:24.269000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>erbakan.edu.tr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1681787</th>\n",
" <td>0000-0003-0033-0918</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>osman</td>\n",
" <td>perçin</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>opercin@erbakan.edu.tr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, necmettin erbakan university, konya, , tr,...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-10-13 05:47:12.014000+00:00</td>\n",
" <td>2020-12-25 13:52:03.976000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>erbakan.edu.tr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"347852 0000-0002-2232-9638 True True \n",
"1681787 0000-0003-0033-0918 True True \n",
"\n",
" given_names family_name biography other_names primary_email \\\n",
"347852 osman perçin <NA> NaN opercin@erbakan.edu.tr \n",
"1681787 osman perçin <NA> NaN opercin@erbakan.edu.tr \n",
"\n",
" keywords external_ids education \\\n",
"347852 NaN NaN NaN \n",
"1681787 NaN NaN NaN \n",
"\n",
" employment n_works \\\n",
"347852 NaN 0 \n",
"1681787 [[, necmettin erbakan university, konya, , tr,... 0 \n",
"\n",
" works_source activation_date \\\n",
"347852 NaN 2015-01-12 13:47:55.549000+00:00 \n",
"1681787 NaN 2015-10-13 05:47:12.014000+00:00 \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids \\\n",
"347852 2020-01-27 07:38:24.269000+00:00 0 0 0 0 \n",
"1681787 2020-12-25 13:52:03.976000+00:00 0 0 0 0 \n",
"\n",
" label primary_email_domain other_email_domains url_domains n_emails \\\n",
"347852 False erbakan.edu.tr NaN NaN <NA> \n",
"1681787 False erbakan.edu.tr NaN NaN <NA> \n",
"\n",
" n_urls n_ids n_keywords n_education n_employment \n",
"347852 <NA> <NA> <NA> <NA> <NA> \n",
"1681787 <NA> <NA> <NA> <NA> 1 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['primary_email'] == 'opercin@erbakan.edu.tr']"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>954085</th>\n",
" <td>0000-0002-9158-1757</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>patrick</td>\n",
" <td>davey</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>patrick.davey@monash.edu</td>\n",
" <td>[inorganic chemistry, radiopharmaceuticals, ra...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[phd student, monash university, melbourne, ,...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2019-05-09 23:01:02.170000+00:00</td>\n",
" <td>2019-08-20 03:00:17.844000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>monash.edu</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>4</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5590332</th>\n",
" <td>0000-0002-8774-0030</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>patrick</td>\n",
" <td>davey</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>patrick.davey@monash.edu</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[phd student, monash university, melbourne, v...</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>2018-09-11 10:47:10.997000+00:00</td>\n",
" <td>2021-02-09 06:21:44.138000+00:00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>monash.edu</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"954085 0000-0002-9158-1757 True True \n",
"5590332 0000-0002-8774-0030 True True \n",
"\n",
" given_names family_name biography other_names \\\n",
"954085 patrick davey <NA> NaN \n",
"5590332 patrick davey <NA> NaN \n",
"\n",
" primary_email \\\n",
"954085 patrick.davey@monash.edu \n",
"5590332 patrick.davey@monash.edu \n",
"\n",
" keywords external_ids \\\n",
"954085 [inorganic chemistry, radiopharmaceuticals, ra... NaN \n",
"5590332 NaN NaN \n",
"\n",
" education employment n_works \\\n",
"954085 NaN [[phd student, monash university, melbourne, ,... 0 \n",
"5590332 NaN [[phd student, monash university, melbourne, v... 1 \n",
"\n",
" works_source activation_date \\\n",
"954085 NaN 2019-05-09 23:01:02.170000+00:00 \n",
"5590332 [crossref] 2018-09-11 10:47:10.997000+00:00 \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids \\\n",
"954085 2019-08-20 03:00:17.844000+00:00 0 0 0 0 \n",
"5590332 2021-02-09 06:21:44.138000+00:00 1 0 0 0 \n",
"\n",
" label primary_email_domain other_email_domains url_domains n_emails \\\n",
"954085 False monash.edu NaN NaN <NA> \n",
"5590332 True monash.edu NaN NaN <NA> \n",
"\n",
" n_urls n_ids n_keywords n_education n_employment \n",
"954085 <NA> <NA> 4 <NA> 1 \n",
"5590332 <NA> <NA> <NA> <NA> 1 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['primary_email'] == 'patrick.davey@monash.edu']"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 124722\n",
"unique 17160\n",
"top gmail.com\n",
"freq 26750\n",
"Name: primary_email_domain, dtype: object"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['primary_email_domain'].describe()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" </tr>\n",
" <tr>\n",
" <th>primary_email_domain</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>gmail.com</th>\n",
" <td>26750</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hotmail.com</th>\n",
" <td>3801</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yahoo.com</th>\n",
" <td>2625</td>\n",
" </tr>\n",
" <tr>\n",
" <th>163.com</th>\n",
" <td>2132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yuhs.ac</th>\n",
" <td>1134</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imf.csic.es</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imf.org</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imfd.tu-freiberg.de</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imft.fr</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>zzuli.edu.cn</th>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>17160 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid\n",
"primary_email_domain \n",
"gmail.com 26750\n",
"hotmail.com 3801\n",
"yahoo.com 2625\n",
"163.com 2132\n",
"yuhs.ac 1134\n",
"... ...\n",
"imf.csic.es 1\n",
"imf.org 1\n",
"imfd.tu-freiberg.de 1\n",
"imft.fr 1\n",
"zzuli.edu.cn 1\n",
"\n",
"[17160 rows x 1 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top_primary_emails = df[['primary_email_domain', 'orcid']]\\\n",
" .groupby('primary_email_domain')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)\n",
"top_primary_emails"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"gmail.com",
"hotmail.com",
"yahoo.com",
"163.com",
"yuhs.ac",
"qq.com",
"outlook.com",
"126.com",
"bu.edu",
"usgs.gov",
"mail.ru",
"usp.br",
"yahoo.com.br",
"ua.pt",
"umich.edu",
"ust.hk",
"foxmail.com",
"uomustansiriyah.edu.iq",
"yandex.ru",
"uq.edu.au",
"ukr.net",
"unesp.br",
"ucl.ac.uk",
"ieee.org",
"naver.com",
"stcatz.ox.ac.uk",
"st-annes.ox.ac.uk",
"yahoo.fr",
"ucm.es",
"live.com"
],
"y": [
26750,
3801,
2625,
2132,
1134,
1059,
948,
766,
629,
586,
579,
464,
459,
302,
290,
277,
260,
248,
244,
235,
226,
218,
210,
205,
188,
184,
184,
174,
174,
165
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top-30 email domains"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"d80a3713-3581-4754-afbe-d0f4f26f3693\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"d80a3713-3581-4754-afbe-d0f4f26f3693\")) { Plotly.newPlot( \"d80a3713-3581-4754-afbe-d0f4f26f3693\", [{\"type\": \"bar\", \"x\": [\"gmail.com\", \"hotmail.com\", \"yahoo.com\", \"163.com\", \"yuhs.ac\", \"qq.com\", \"outlook.com\", \"126.com\", \"bu.edu\", \"usgs.gov\", \"mail.ru\", \"usp.br\", \"yahoo.com.br\", \"ua.pt\", \"umich.edu\", \"ust.hk\", \"foxmail.com\", \"uomustansiriyah.edu.iq\", \"yandex.ru\", \"uq.edu.au\", \"ukr.net\", \"unesp.br\", \"ucl.ac.uk\", \"ieee.org\", \"naver.com\", \"stcatz.ox.ac.uk\", \"st-annes.ox.ac.uk\", \"yahoo.fr\", \"ucm.es\", \"live.com\"], \"y\": [26750, 3801, 2625, 2132, 1134, 1059, 948, 766, 629, 586, 579, 464, 459, 302, 290, 277, 260, 248, 244, 235, 226, 218, 210, 205, 188, 184, 184, 174, 174, 165]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top-30 email domains\"}, \"xaxis\": {\"range\": [-0.5, 29.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('d80a3713-3581-4754-afbe-d0f4f26f3693');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(30)\n",
"data = [\n",
" go.Bar(\n",
" x=top_primary_emails[:TOP_N].index,\n",
" y=top_primary_emails[:TOP_N]['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top-%s email domains' % TOP_N,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Other emails"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>251</th>\n",
" <td>0000-0002-5916-446X</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>antonio gilvan</td>\n",
" <td>teixeira júnior</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[teixeira, antônio gilvan, júnior, antonio gil...</td>\n",
" <td>gilvan.junior@aluno.ufca.edu.br</td>\n",
" <td>[ethicis; medicine; infectology; neurology; ne...</td>\n",
" <td>[[scopus author id, 56647743200], [scopus auth...</td>\n",
" <td>[[faculty of health and life sciences, , unive...</td>\n",
" <td>NaN</td>\n",
" <td>14</td>\n",
" <td>[antonio gilvan teixeira júnior, scopus - else...</td>\n",
" <td>2016-05-18 11:26:36.642000+00:00</td>\n",
" <td>2016-09-20 18:25:05.728000+00:00</td>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>8</td>\n",
" <td>False</td>\n",
" <td>aluno.ufca.edu.br</td>\n",
" <td>[liverpool.ac.uk]</td>\n",
" <td>[researchgate.net, academia.edu, cnpq.br]</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>316</th>\n",
" <td>0000-0002-8742-947X</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>aaron</td>\n",
" <td>tan shing loong</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>aaron.tanshingloong@wadh.ox.ac.uk</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[ruskin school of art; wadham college, , univ...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-10-05 23:10:08.771000+00:00</td>\n",
" <td>2016-06-14 19:55:50.313000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>wadh.ox.ac.uk</td>\n",
" <td>[rsa.ox.ac.uk]</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>433</th>\n",
" <td>0000-0001-9097-2281</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>abhishek</td>\n",
" <td>solanki</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[senior engineer, robert bosch (india), benga...</td>\n",
" <td>1</td>\n",
" <td>[abhishek solanki]</td>\n",
" <td>2019-04-22 04:43:06.232000+00:00</td>\n",
" <td>2020-07-02 14:18:28.305000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>[in.bosch.com]</td>\n",
" <td>[github.com, linkedin.com]</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>497</th>\n",
" <td>0000-0002-8614-3007</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>adam</td>\n",
" <td>arra</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2017-11-15 06:33:45.625000+00:00</td>\n",
" <td>2017-11-15 06:44:02.998000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>[hct.ac.ae]</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>869</th>\n",
" <td>0000-0001-9884-5498</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>alberto</td>\n",
" <td>ronzani</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>alberto@aronza.com</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[research scientist, vtt technical research c...</td>\n",
" <td>19</td>\n",
" <td>[crossref metadata search, alberto ronzani, cr...</td>\n",
" <td>2014-04-16 13:21:54.287000+00:00</td>\n",
" <td>2020-09-28 15:10:37.439000+00:00</td>\n",
" <td>18</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>True</td>\n",
" <td>aronza.com</td>\n",
" <td>[vtt.fi]</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"251 0000-0002-5916-446X True True \n",
"316 0000-0002-8742-947X True True \n",
"433 0000-0001-9097-2281 True True \n",
"497 0000-0002-8614-3007 True True \n",
"869 0000-0001-9884-5498 True True \n",
"\n",
" given_names family_name biography \\\n",
"251 antonio gilvan teixeira júnior <NA> \n",
"316 aaron tan shing loong <NA> \n",
"433 abhishek solanki <NA> \n",
"497 adam arra <NA> \n",
"869 alberto ronzani <NA> \n",
"\n",
" other_names \\\n",
"251 [teixeira, antônio gilvan, júnior, antonio gil... \n",
"316 NaN \n",
"433 NaN \n",
"497 NaN \n",
"869 NaN \n",
"\n",
" primary_email \\\n",
"251 gilvan.junior@aluno.ufca.edu.br \n",
"316 aaron.tanshingloong@wadh.ox.ac.uk \n",
"433 <NA> \n",
"497 <NA> \n",
"869 alberto@aronza.com \n",
"\n",
" keywords \\\n",
"251 [ethicis; medicine; infectology; neurology; ne... \n",
"316 NaN \n",
"433 NaN \n",
"497 NaN \n",
"869 NaN \n",
"\n",
" external_ids \\\n",
"251 [[scopus author id, 56647743200], [scopus auth... \n",
"316 NaN \n",
"433 NaN \n",
"497 NaN \n",
"869 NaN \n",
"\n",
" education \\\n",
"251 [[faculty of health and life sciences, , unive... \n",
"316 [[ruskin school of art; wadham college, , univ... \n",
"433 NaN \n",
"497 NaN \n",
"869 NaN \n",
"\n",
" employment n_works \\\n",
"251 NaN 14 \n",
"316 NaN 0 \n",
"433 [[senior engineer, robert bosch (india), benga... 1 \n",
"497 NaN 0 \n",
"869 [[research scientist, vtt technical research c... 19 \n",
"\n",
" works_source \\\n",
"251 [antonio gilvan teixeira júnior, scopus - else... \n",
"316 NaN \n",
"433 [abhishek solanki] \n",
"497 NaN \n",
"869 [crossref metadata search, alberto ronzani, cr... \n",
"\n",
" activation_date last_update_date n_doi \\\n",
"251 2016-05-18 11:26:36.642000+00:00 2016-09-20 18:25:05.728000+00:00 13 \n",
"316 2015-10-05 23:10:08.771000+00:00 2016-06-14 19:55:50.313000+00:00 0 \n",
"433 2019-04-22 04:43:06.232000+00:00 2020-07-02 14:18:28.305000+00:00 0 \n",
"497 2017-11-15 06:33:45.625000+00:00 2017-11-15 06:44:02.998000+00:00 0 \n",
"869 2014-04-16 13:21:54.287000+00:00 2020-09-28 15:10:37.439000+00:00 18 \n",
"\n",
" n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"251 0 0 8 False aluno.ufca.edu.br \n",
"316 0 0 0 False wadh.ox.ac.uk \n",
"433 0 0 0 False NaN \n",
"497 0 0 0 False NaN \n",
"869 0 0 3 True aronza.com \n",
"\n",
" other_email_domains url_domains n_emails \\\n",
"251 [liverpool.ac.uk] [researchgate.net, academia.edu, cnpq.br] 1 \n",
"316 [rsa.ox.ac.uk] NaN 1 \n",
"433 [in.bosch.com] [github.com, linkedin.com] 1 \n",
"497 [hct.ac.ae] NaN 1 \n",
"869 [vtt.fi] NaN 1 \n",
"\n",
" n_urls n_ids n_keywords n_education n_employment \n",
"251 3 4 1 1 <NA> \n",
"316 <NA> <NA> <NA> 1 <NA> \n",
"433 2 <NA> <NA> <NA> 2 \n",
"497 <NA> <NA> <NA> <NA> <NA> \n",
"869 <NA> <NA> <NA> <NA> 1 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.other_email_domains.notna()].head()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"emails_by_orcid = df[['orcid', 'n_emails']].sort_values('n_emails', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"0000-0003-4171-3835",
"0000-0001-6239-2968",
"0000-0003-2151-4089",
"0000-0003-2290-2817",
"0000-0001-9084-3156",
"0000-0001-6349-1044",
"0000-0002-2085-1908",
"0000-0003-4147-212X",
"0000-0002-9599-6909",
"0000-0001-9311-0687",
"0000-0003-1502-3910",
"0000-0002-9821-8424",
"0000-0003-4327-6827",
"0000-0002-1929-6054",
"0000-0002-8390-8238",
"0000-0002-1615-8633",
"0000-0003-0671-1543",
"0000-0003-4499-7300",
"0000-0002-5341-6531",
"0000-0002-8565-194X",
"0000-0002-0776-9547",
"0000-0001-8420-9204",
"0000-0002-7396-1561",
"0000-0002-3165-132X",
"0000-0002-2567-3741",
"0000-0003-2657-8225",
"0000-0003-4685-5621",
"0000-0001-5548-8259",
"0000-0003-0391-3430",
"0000-0003-2526-0928"
],
"y": [
12,
9,
7,
7,
6,
6,
6,
6,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
4
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 30 ORCID iDs by email"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"635feda2-aef8-40d4-b05a-c99ecbca4004\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"635feda2-aef8-40d4-b05a-c99ecbca4004\")) { Plotly.newPlot( \"635feda2-aef8-40d4-b05a-c99ecbca4004\", [{\"type\": \"bar\", \"x\": [\"0000-0003-4171-3835\", \"0000-0001-6239-2968\", \"0000-0003-2151-4089\", \"0000-0003-2290-2817\", \"0000-0001-9084-3156\", \"0000-0001-6349-1044\", \"0000-0002-2085-1908\", \"0000-0003-4147-212X\", \"0000-0002-9599-6909\", \"0000-0001-9311-0687\", \"0000-0003-1502-3910\", \"0000-0002-9821-8424\", \"0000-0003-4327-6827\", \"0000-0002-1929-6054\", \"0000-0002-8390-8238\", \"0000-0002-1615-8633\", \"0000-0003-0671-1543\", \"0000-0003-4499-7300\", \"0000-0002-5341-6531\", \"0000-0002-8565-194X\", \"0000-0002-0776-9547\", \"0000-0001-8420-9204\", \"0000-0002-7396-1561\", \"0000-0002-3165-132X\", \"0000-0002-2567-3741\", \"0000-0003-2657-8225\", \"0000-0003-4685-5621\", \"0000-0001-5548-8259\", \"0000-0003-0391-3430\", \"0000-0003-2526-0928\"], \"y\": [12, 9, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top 30 ORCID iDs by email\"}, \"xaxis\": {\"range\": [-0.5, 29.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('635feda2-aef8-40d4-b05a-c99ecbca4004');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(30)\n",
"data = [\n",
" go.Bar(\n",
" x=emails_by_orcid[:TOP_N]['orcid'],\n",
" y=emails_by_orcid[:TOP_N]['n_emails']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top %s ORCID iDs by email' % TOP_N, \n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"top_other_emails = df[['orcid', 'other_email_domains']]\\\n",
" .explode('other_email_domains')\\\n",
" .reset_index(drop=True)\\\n",
" .groupby('other_email_domains')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"gmail.com",
"hotmail.com",
"yahoo.com",
"qq.com",
"163.com",
"outlook.com",
"126.com",
"usp.br",
"ieee.org",
"yahoo.com.br",
"mail.ru",
"unesp.br",
"sbs.ox.ac.uk",
"yuhs.ac",
"naver.com",
"icloud.com",
"foxmail.com",
"uq.edu.au",
"ua.pt",
"cam.ac.uk",
"imperial.ac.uk",
"ukr.net",
"law.ox.ac.uk",
"mit.edu",
"monash.edu",
"stanford.edu",
"ucl.ac.uk",
"education.ox.ac.uk",
"ucm.es",
"conted.ox.ac.uk"
],
"y": [
11198,
1550,
1303,
785,
780,
433,
262,
236,
226,
151,
148,
141,
136,
134,
132,
119,
98,
96,
90,
84,
79,
75,
75,
74,
70,
70,
69,
67,
66,
65
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 30 other email domains"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"c3125310-694d-4dbf-91fb-00503a84a3d0\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"c3125310-694d-4dbf-91fb-00503a84a3d0\")) { Plotly.newPlot( \"c3125310-694d-4dbf-91fb-00503a84a3d0\", [{\"type\": \"bar\", \"x\": [\"gmail.com\", \"hotmail.com\", \"yahoo.com\", \"qq.com\", \"163.com\", \"outlook.com\", \"126.com\", \"usp.br\", \"ieee.org\", \"yahoo.com.br\", \"mail.ru\", \"unesp.br\", \"sbs.ox.ac.uk\", \"yuhs.ac\", \"naver.com\", \"icloud.com\", \"foxmail.com\", \"uq.edu.au\", \"ua.pt\", \"cam.ac.uk\", \"imperial.ac.uk\", \"ukr.net\", \"law.ox.ac.uk\", \"mit.edu\", \"monash.edu\", \"stanford.edu\", \"ucl.ac.uk\", \"education.ox.ac.uk\", \"ucm.es\", \"conted.ox.ac.uk\"], \"y\": [11198, 1550, 1303, 785, 780, 433, 262, 236, 226, 151, 148, 141, 136, 134, 132, 119, 98, 96, 90, 84, 79, 75, 75, 74, 70, 70, 69, 67, 66, 65]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top 30 other email domains\"}, \"xaxis\": {\"range\": [-0.5, 29.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('c3125310-694d-4dbf-91fb-00503a84a3d0');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(30)\n",
"data = [\n",
" go.Bar(\n",
" x=top_other_emails[:TOP_N].index,\n",
" y=top_other_emails[:TOP_N]['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top %s other email domains' % TOP_N, \n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This somehow makes sense, legitimate users could put the gmail account as primary for login purposes and have institutional addresses as other email addresses. It makes also the life easier upon relocation."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Email speculation"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>433</th>\n",
" <td>0000-0001-9097-2281</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>abhishek</td>\n",
" <td>solanki</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[senior engineer, robert bosch (india), benga...</td>\n",
" <td>1</td>\n",
" <td>[abhishek solanki]</td>\n",
" <td>2019-04-22 04:43:06.232000+00:00</td>\n",
" <td>2020-07-02 14:18:28.305000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>[in.bosch.com]</td>\n",
" <td>[github.com, linkedin.com]</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>497</th>\n",
" <td>0000-0002-8614-3007</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>adam</td>\n",
" <td>arra</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2017-11-15 06:33:45.625000+00:00</td>\n",
" <td>2017-11-15 06:44:02.998000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>[hct.ac.ae]</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>898</th>\n",
" <td>0000-0003-3728-6439</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>alejandra</td>\n",
" <td>echeverry velásquez</td>\n",
" <td>alejandra echeverry is an industrial electrici...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[control, science, technology, innovation, ren...</td>\n",
" <td>NaN</td>\n",
" <td>[[, electrical engineer, institución universit...</td>\n",
" <td>[[professor, institución universitaria pascual...</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>2019-03-31 00:00:42.929000+00:00</td>\n",
" <td>2020-09-06 02:18:54.290000+00:00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>[pascualbravo.edu.co]</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1719</th>\n",
" <td>0000-0001-8330-7443</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>andrea</td>\n",
" <td>tesoniero</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>[[researcherid, d-9056-2015]]</td>\n",
" <td>[[department of geophysics, master of science ...</td>\n",
" <td>[[postdoctoral associate, yale university, new...</td>\n",
" <td>4</td>\n",
" <td>[andrea tesoniero]</td>\n",
" <td>2015-03-09 11:59:06.093000+00:00</td>\n",
" <td>2020-08-20 15:03:23.447000+00:00</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>[yale.edu]</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6829</th>\n",
" <td>0000-0001-9670-515X</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>esma esin</td>\n",
" <td>yildirim</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[natural chemistry, pharmacognosy, chemical en...</td>\n",
" <td>NaN</td>\n",
" <td>[[business management, master of science, ista...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-07-26 10:38:03.721000+00:00</td>\n",
" <td>2020-07-26 10:52:26.539000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>[gmail.com]</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10985816</th>\n",
" <td>0000-0003-1204-6009</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>nathan</td>\n",
" <td>walk</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[department of physics, doctor of philosophy,...</td>\n",
" <td>[[, university of oxford, oxford, oxfordshire,...</td>\n",
" <td>10</td>\n",
" <td>[crossref metadata search]</td>\n",
" <td>2016-07-28 14:24:16.844000+00:00</td>\n",
" <td>2020-10-13 11:47:50.621000+00:00</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>[cs.ox.ac.uk]</td>\n",
" <td>[fu-berlin.de]</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10986027</th>\n",
" <td>0000-0002-3472-7668</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>raf</td>\n",
" <td>vandevelde</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[chemical engineering technology, master, kat...</td>\n",
" <td>[[phd researcher, katholieke universiteit leuv...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-10-14 13:56:44.779000+00:00</td>\n",
" <td>2020-10-16 14:21:40.673000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>[kuleuven.be]</td>\n",
" <td>[linkedin.com]</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10987501</th>\n",
" <td>0000-0002-9602-0529</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>carlos augusto</td>\n",
" <td>finelli</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>2013-09-16 16:52:06.120000+00:00</td>\n",
" <td>2020-12-01 22:47:08.074000+00:00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>[cecot.com.br]</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10987829</th>\n",
" <td>0000-0003-4402-5982</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>filipe</td>\n",
" <td>de almeida araújo</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[materials science, msc. materials science, m...</td>\n",
" <td>[[co-owner, aeft acessory, manaus, amazonas, b...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-03-02 20:11:01.699000+00:00</td>\n",
" <td>2020-12-04 13:53:39.404000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>[ime.eb.br]</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10988444</th>\n",
" <td>0000-0002-1734-7241</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>manareldeen</td>\n",
" <td>ahmed</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[atomistic simulation, ai chips, graphene, dee...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[post-doctor, zhejiang university, hangzhou, ...</td>\n",
" <td>6</td>\n",
" <td>[manareldeen ahmed]</td>\n",
" <td>2017-02-17 13:18:36.540000+00:00</td>\n",
" <td>2020-12-04 02:04:36.668000+00:00</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>[hotmail.com]</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>5</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>19814 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"433 0000-0001-9097-2281 True True \n",
"497 0000-0002-8614-3007 True True \n",
"898 0000-0003-3728-6439 True True \n",
"1719 0000-0001-8330-7443 True True \n",
"6829 0000-0001-9670-515X True True \n",
"... ... ... ... \n",
"10985816 0000-0003-1204-6009 True True \n",
"10986027 0000-0002-3472-7668 True True \n",
"10987501 0000-0002-9602-0529 True True \n",
"10987829 0000-0003-4402-5982 True True \n",
"10988444 0000-0002-1734-7241 True True \n",
"\n",
" given_names family_name \\\n",
"433 abhishek solanki \n",
"497 adam arra \n",
"898 alejandra echeverry velásquez \n",
"1719 andrea tesoniero \n",
"6829 esma esin yildirim \n",
"... ... ... \n",
"10985816 nathan walk \n",
"10986027 raf vandevelde \n",
"10987501 carlos augusto finelli \n",
"10987829 filipe de almeida araújo \n",
"10988444 manareldeen ahmed \n",
"\n",
" biography other_names \\\n",
"433 <NA> NaN \n",
"497 <NA> NaN \n",
"898 alejandra echeverry is an industrial electrici... NaN \n",
"1719 <NA> NaN \n",
"6829 <NA> NaN \n",
"... ... ... \n",
"10985816 <NA> NaN \n",
"10986027 <NA> NaN \n",
"10987501 <NA> NaN \n",
"10987829 <NA> NaN \n",
"10988444 <NA> NaN \n",
"\n",
" primary_email keywords \\\n",
"433 <NA> NaN \n",
"497 <NA> NaN \n",
"898 <NA> [control, science, technology, innovation, ren... \n",
"1719 <NA> NaN \n",
"6829 <NA> [natural chemistry, pharmacognosy, chemical en... \n",
"... ... ... \n",
"10985816 <NA> NaN \n",
"10986027 <NA> NaN \n",
"10987501 <NA> NaN \n",
"10987829 <NA> NaN \n",
"10988444 <NA> [atomistic simulation, ai chips, graphene, dee... \n",
"\n",
" external_ids \\\n",
"433 NaN \n",
"497 NaN \n",
"898 NaN \n",
"1719 [[researcherid, d-9056-2015]] \n",
"6829 NaN \n",
"... ... \n",
"10985816 NaN \n",
"10986027 NaN \n",
"10987501 NaN \n",
"10987829 NaN \n",
"10988444 NaN \n",
"\n",
" education \\\n",
"433 NaN \n",
"497 NaN \n",
"898 [[, electrical engineer, institución universit... \n",
"1719 [[department of geophysics, master of science ... \n",
"6829 [[business management, master of science, ista... \n",
"... ... \n",
"10985816 [[department of physics, doctor of philosophy,... \n",
"10986027 [[chemical engineering technology, master, kat... \n",
"10987501 NaN \n",
"10987829 [[materials science, msc. materials science, m... \n",
"10988444 NaN \n",
"\n",
" employment n_works \\\n",
"433 [[senior engineer, robert bosch (india), benga... 1 \n",
"497 NaN 0 \n",
"898 [[professor, institución universitaria pascual... 1 \n",
"1719 [[postdoctoral associate, yale university, new... 4 \n",
"6829 NaN 0 \n",
"... ... ... \n",
"10985816 [[, university of oxford, oxford, oxfordshire,... 10 \n",
"10986027 [[phd researcher, katholieke universiteit leuv... 0 \n",
"10987501 NaN 1 \n",
"10987829 [[co-owner, aeft acessory, manaus, amazonas, b... 0 \n",
"10988444 [[post-doctor, zhejiang university, hangzhou, ... 6 \n",
"\n",
" works_source activation_date \\\n",
"433 [abhishek solanki] 2019-04-22 04:43:06.232000+00:00 \n",
"497 NaN 2017-11-15 06:33:45.625000+00:00 \n",
"898 [crossref] 2019-03-31 00:00:42.929000+00:00 \n",
"1719 [andrea tesoniero] 2015-03-09 11:59:06.093000+00:00 \n",
"6829 NaN 2020-07-26 10:38:03.721000+00:00 \n",
"... ... ... \n",
"10985816 [crossref metadata search] 2016-07-28 14:24:16.844000+00:00 \n",
"10986027 NaN 2020-10-14 13:56:44.779000+00:00 \n",
"10987501 [crossref] 2013-09-16 16:52:06.120000+00:00 \n",
"10987829 NaN 2020-03-02 20:11:01.699000+00:00 \n",
"10988444 [manareldeen ahmed] 2017-02-17 13:18:36.540000+00:00 \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc \\\n",
"433 2020-07-02 14:18:28.305000+00:00 0 0 0 \n",
"497 2017-11-15 06:44:02.998000+00:00 0 0 0 \n",
"898 2020-09-06 02:18:54.290000+00:00 1 0 0 \n",
"1719 2020-08-20 15:03:23.447000+00:00 4 0 0 \n",
"6829 2020-07-26 10:52:26.539000+00:00 0 0 0 \n",
"... ... ... ... ... \n",
"10985816 2020-10-13 11:47:50.621000+00:00 10 0 0 \n",
"10986027 2020-10-16 14:21:40.673000+00:00 0 0 0 \n",
"10987501 2020-12-01 22:47:08.074000+00:00 1 0 0 \n",
"10987829 2020-12-04 13:53:39.404000+00:00 0 0 0 \n",
"10988444 2020-12-04 02:04:36.668000+00:00 6 0 0 \n",
"\n",
" n_other_pids label primary_email_domain other_email_domains \\\n",
"433 0 False NaN [in.bosch.com] \n",
"497 0 False NaN [hct.ac.ae] \n",
"898 0 True NaN [pascualbravo.edu.co] \n",
"1719 2 False NaN [yale.edu] \n",
"6829 0 False NaN [gmail.com] \n",
"... ... ... ... ... \n",
"10985816 0 True NaN [cs.ox.ac.uk] \n",
"10986027 0 False NaN [kuleuven.be] \n",
"10987501 0 True NaN [cecot.com.br] \n",
"10987829 0 False NaN [ime.eb.br] \n",
"10988444 3 True NaN [hotmail.com] \n",
"\n",
" url_domains n_emails n_urls n_ids n_keywords \\\n",
"433 [github.com, linkedin.com] 1 2 <NA> <NA> \n",
"497 NaN 1 <NA> <NA> <NA> \n",
"898 NaN 1 <NA> <NA> 7 \n",
"1719 NaN 1 <NA> 1 <NA> \n",
"6829 NaN 1 <NA> <NA> 3 \n",
"... ... ... ... ... ... \n",
"10985816 [fu-berlin.de] 1 1 <NA> <NA> \n",
"10986027 [linkedin.com] 1 1 <NA> <NA> \n",
"10987501 NaN 1 <NA> <NA> <NA> \n",
"10987829 NaN 1 <NA> <NA> <NA> \n",
"10988444 NaN 1 <NA> <NA> 5 \n",
"\n",
" n_education n_employment \n",
"433 <NA> 2 \n",
"497 <NA> <NA> \n",
"898 1 1 \n",
"1719 4 2 \n",
"6829 3 <NA> \n",
"... ... ... \n",
"10985816 3 2 \n",
"10986027 2 1 \n",
"10987501 <NA> <NA> \n",
"10987829 2 1 \n",
"10988444 <NA> 1 \n",
"\n",
"[19814 rows x 30 columns]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.primary_email.isna() & df.other_email_domains.notna()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## URLs"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 715067.000000\n",
"mean 1.434629\n",
"std 1.429160\n",
"min 1.000000\n",
"25% 1.000000\n",
"50% 1.000000\n",
"75% 1.000000\n",
"max 219.000000\n",
"Name: n_urls, dtype: float64"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.n_urls.describe()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [orcid, verified_email, verified_primary_email, given_names, family_name, biography, other_names, primary_email, keywords, external_ids, education, employment, n_works, works_source, activation_date, last_update_date, n_doi, n_arxiv, n_pmc, n_other_pids, label, primary_email_domain, other_email_domains, url_domains, n_emails, n_urls, n_ids, n_keywords, n_education, n_employment]\n",
"Index: []"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.n_urls > df.n_urls.max()]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0000-0001-7402-0096</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, kth royal institute of technology, stockho...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-01-11 15:13:06.467000+00:00</td>\n",
" <td>2016-06-14 23:55:59.896000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[kth.se]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0000-0001-8377-3508</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[fontana, milena da silva]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[educação; informática; matemática.]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, instituto federal de educação, ciência e t...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-05-23 23:39:04.534000+00:00</td>\n",
" <td>2019-10-16 02:50:11.007000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[cnpq.br]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>0000-0002-2638-4108</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>investigador de la universidad de oviedo. depa...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[history of political thought, constitutional ...</td>\n",
" <td>[[scopus author id, 54394231000]]</td>\n",
" <td>[[public law, ph doctor, university of oviedo,...</td>\n",
" <td>[[professor of constitutional law, university ...</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>2013-03-25 14:38:06.016000+00:00</td>\n",
" <td>2020-07-01 13:10:37.025000+00:00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[unioviedo.es]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>0000-0003-1435-6545</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[migration, culture cell, prostate cancer]</td>\n",
" <td>[[researcherid, p-2223-2018]]</td>\n",
" <td>[[morfologia, , universidade estadual paulista...</td>\n",
" <td>[[, universidade estadual paulista (unesp), in...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-08-09 12:12:24.405000+00:00</td>\n",
" <td>2020-04-22 01:38:03.184000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[cnpq.br, linkedin.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>158</th>\n",
" <td>0000-0003-1284-9741</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>alex percy antonio</td>\n",
" <td>manriquez paisig</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-09-08 20:04:33.906000+00:00</td>\n",
" <td>2020-09-08 20:25:55.432000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[youtube.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"6 0000-0001-7402-0096 True True \n",
"11 0000-0001-8377-3508 True True \n",
"29 0000-0002-2638-4108 True True \n",
"46 0000-0003-1435-6545 True True \n",
"158 0000-0003-1284-9741 True True \n",
"\n",
" given_names family_name \\\n",
"6 <NA> <NA> \n",
"11 <NA> <NA> \n",
"29 <NA> <NA> \n",
"46 <NA> <NA> \n",
"158 alex percy antonio manriquez paisig \n",
"\n",
" biography \\\n",
"6 <NA> \n",
"11 <NA> \n",
"29 investigador de la universidad de oviedo. depa... \n",
"46 <NA> \n",
"158 <NA> \n",
"\n",
" other_names primary_email \\\n",
"6 NaN <NA> \n",
"11 [fontana, milena da silva] <NA> \n",
"29 NaN <NA> \n",
"46 NaN <NA> \n",
"158 NaN <NA> \n",
"\n",
" keywords \\\n",
"6 NaN \n",
"11 [educação; informática; matemática.] \n",
"29 [history of political thought, constitutional ... \n",
"46 [migration, culture cell, prostate cancer] \n",
"158 NaN \n",
"\n",
" external_ids \\\n",
"6 NaN \n",
"11 NaN \n",
"29 [[scopus author id, 54394231000]] \n",
"46 [[researcherid, p-2223-2018]] \n",
"158 NaN \n",
"\n",
" education \\\n",
"6 NaN \n",
"11 NaN \n",
"29 [[public law, ph doctor, university of oviedo,... \n",
"46 [[morfologia, , universidade estadual paulista... \n",
"158 NaN \n",
"\n",
" employment n_works works_source \\\n",
"6 [[, kth royal institute of technology, stockho... 0 NaN \n",
"11 [[, instituto federal de educação, ciência e t... 0 NaN \n",
"29 [[professor of constitutional law, university ... 1 [crossref] \n",
"46 [[, universidade estadual paulista (unesp), in... 0 NaN \n",
"158 NaN 0 NaN \n",
"\n",
" activation_date last_update_date n_doi \\\n",
"6 2015-01-11 15:13:06.467000+00:00 2016-06-14 23:55:59.896000+00:00 0 \n",
"11 2018-05-23 23:39:04.534000+00:00 2019-10-16 02:50:11.007000+00:00 0 \n",
"29 2013-03-25 14:38:06.016000+00:00 2020-07-01 13:10:37.025000+00:00 1 \n",
"46 2018-08-09 12:12:24.405000+00:00 2020-04-22 01:38:03.184000+00:00 0 \n",
"158 2020-09-08 20:04:33.906000+00:00 2020-09-08 20:25:55.432000+00:00 0 \n",
"\n",
" n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"6 0 0 0 False NaN \n",
"11 0 0 0 False NaN \n",
"29 0 0 0 False NaN \n",
"46 0 0 0 False NaN \n",
"158 0 0 0 False NaN \n",
"\n",
" other_email_domains url_domains n_emails n_urls n_ids \\\n",
"6 NaN [kth.se] <NA> 1 <NA> \n",
"11 NaN [cnpq.br] <NA> 1 <NA> \n",
"29 NaN [unioviedo.es] <NA> 1 1 \n",
"46 NaN [cnpq.br, linkedin.com] <NA> 2 1 \n",
"158 NaN [youtube.com] <NA> 1 <NA> \n",
"\n",
" n_keywords n_education n_employment \n",
"6 <NA> <NA> 1 \n",
"11 1 <NA> 3 \n",
"29 3 1 1 \n",
"46 3 1 1 \n",
"158 <NA> <NA> <NA> "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.url_domains.notna()].head()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>n_urls</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3226518</th>\n",
" <td>0000-0002-1234-835X</td>\n",
" <td>219</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4206055</th>\n",
" <td>0000-0001-7478-4539</td>\n",
" <td>174</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4901870</th>\n",
" <td>0000-0002-7392-3792</td>\n",
" <td>169</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8184260</th>\n",
" <td>0000-0002-6938-9638</td>\n",
" <td>152</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2743648</th>\n",
" <td>0000-0002-5710-4041</td>\n",
" <td>114</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989644</th>\n",
" <td>0000-0002-1686-1935</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989645</th>\n",
" <td>0000-0002-3800-6331</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989646</th>\n",
" <td>0000-0002-8783-5814</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989647</th>\n",
" <td>0000-0002-7584-2283</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989648</th>\n",
" <td>0000-0003-0529-3538</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10989649 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid n_urls\n",
"3226518 0000-0002-1234-835X 219\n",
"4206055 0000-0001-7478-4539 174\n",
"4901870 0000-0002-7392-3792 169\n",
"8184260 0000-0002-6938-9638 152\n",
"2743648 0000-0002-5710-4041 114\n",
"... ... ...\n",
"10989644 0000-0002-1686-1935 <NA>\n",
"10989645 0000-0002-3800-6331 <NA>\n",
"10989646 0000-0002-8783-5814 <NA>\n",
"10989647 0000-0002-7584-2283 <NA>\n",
"10989648 0000-0003-0529-3538 <NA>\n",
"\n",
"[10989649 rows x 2 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"urls_by_orcid = df[['orcid', 'n_urls']].sort_values('n_urls', ascending=False)\n",
"urls_by_orcid"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The first three are fake, the fourth isn't. No assumption can be taken."
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"0000-0002-1234-835X",
"0000-0001-7478-4539",
"0000-0002-7392-3792",
"0000-0002-6938-9638",
"0000-0002-5710-4041",
"0000-0003-2450-090X",
"0000-0002-3920-7389",
"0000-0002-6689-4129",
"0000-0001-5384-0001",
"0000-0002-4621-5571",
"0000-0001-9131-1266",
"0000-0002-7754-8889",
"0000-0002-5250-1144",
"0000-0002-9025-8632",
"0000-0002-7456-3848",
"0000-0003-0176-1293",
"0000-0003-0321-7339",
"0000-0002-8493-0402",
"0000-0002-9965-2425",
"0000-0001-8873-6677",
"0000-0002-3997-5070",
"0000-0002-1856-6905",
"0000-0002-4316-1467",
"0000-0002-4062-3603",
"0000-0003-0594-2462",
"0000-0001-5880-7091",
"0000-0003-1524-6268",
"0000-0002-0752-7513",
"0000-0003-2593-7134",
"0000-0002-1298-5252",
"0000-0003-1761-3842",
"0000-0003-2383-8386",
"0000-0003-3546-2312",
"0000-0002-2886-9248",
"0000-0003-2183-8112",
"0000-0002-1929-6054",
"0000-0003-4948-9268",
"0000-0003-2407-3557",
"0000-0002-9276-6921",
"0000-0003-1484-6958",
"0000-0002-7568-3403",
"0000-0002-4305-4215",
"0000-0002-4004-6666",
"0000-0003-0796-0234",
"0000-0001-7133-6896",
"0000-0002-8208-0897",
"0000-0002-9071-5450",
"0000-0003-4993-5555",
"0000-0003-0930-6121",
"0000-0002-8116-9611",
"0000-0002-5139-2660",
"0000-0002-3277-9659",
"0000-0002-8122-879X",
"0000-0001-9559-1103",
"0000-0003-2862-6315",
"0000-0002-2000-8339",
"0000-0001-5300-4601",
"0000-0002-6254-8683",
"0000-0002-6547-0172",
"0000-0003-4808-6619",
"0000-0003-3933-0229",
"0000-0002-0971-9375",
"0000-0003-0694-1154",
"0000-0003-1585-1134",
"0000-0002-4659-5391",
"0000-0002-2916-2893",
"0000-0001-6783-2037",
"0000-0001-6461-2573",
"0000-0003-4501-3756",
"0000-0001-5549-6822",
"0000-0002-8940-3177",
"0000-0003-4326-9336",
"0000-0001-8096-4333",
"0000-0001-8978-4830",
"0000-0002-5946-1595",
"0000-0002-6680-1703",
"0000-0002-8593-9257",
"0000-0002-7653-4899",
"0000-0003-1904-4188",
"0000-0002-5196-4905",
"0000-0001-8808-4867",
"0000-0001-6921-0426",
"0000-0003-1815-1993",
"0000-0002-7843-8497",
"0000-0003-1675-2840",
"0000-0001-8644-2114",
"0000-0003-0907-9870",
"0000-0001-7784-0583",
"0000-0001-7550-5802",
"0000-0001-8986-2528",
"0000-0002-5265-6074",
"0000-0001-9102-8639",
"0000-0002-0696-8560",
"0000-0001-6979-4273",
"0000-0002-7179-6953",
"0000-0002-3334-9386",
"0000-0001-6714-009X",
"0000-0001-7193-5039",
"0000-0002-5241-1026",
"0000-0001-7608-9433"
],
"y": [
219,
174,
169,
152,
114,
114,
111,
104,
104,
90,
83,
83,
81,
81,
80,
80,
80,
76,
73,
72,
71,
70,
69,
69,
68,
68,
68,
68,
67,
67,
66,
66,
65,
64,
61,
61,
61,
59,
57,
57,
57,
57,
57,
57,
57,
56,
55,
55,
55,
55,
51,
50,
50,
50,
49,
49,
48,
48,
48,
48,
47,
47,
46,
46,
46,
45,
45,
45,
45,
44,
43,
43,
43,
43,
42,
42,
42,
41,
41,
41,
40,
40,
39,
39,
39,
39,
38,
38,
38,
38,
38,
37,
37,
37,
37,
37,
36,
36,
36,
36
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 100 ORCID iDs with URLs"
},
"xaxis": {
"range": [
-0.5,
99.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"4089b308-732c-48f6-aa30-b2f71ac024f5\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"4089b308-732c-48f6-aa30-b2f71ac024f5\")) { Plotly.newPlot( \"4089b308-732c-48f6-aa30-b2f71ac024f5\", [{\"type\": \"bar\", \"x\": [\"0000-0002-1234-835X\", \"0000-0001-7478-4539\", \"0000-0002-7392-3792\", \"0000-0002-6938-9638\", \"0000-0002-5710-4041\", \"0000-0003-2450-090X\", \"0000-0002-3920-7389\", \"0000-0002-6689-4129\", \"0000-0001-5384-0001\", \"0000-0002-4621-5571\", \"0000-0001-9131-1266\", \"0000-0002-7754-8889\", \"0000-0002-5250-1144\", \"0000-0002-9025-8632\", \"0000-0002-7456-3848\", \"0000-0003-0176-1293\", \"0000-0003-0321-7339\", \"0000-0002-8493-0402\", \"0000-0002-9965-2425\", \"0000-0001-8873-6677\", \"0000-0002-3997-5070\", \"0000-0002-1856-6905\", \"0000-0002-4316-1467\", \"0000-0002-4062-3603\", \"0000-0003-0594-2462\", \"0000-0001-5880-7091\", \"0000-0003-1524-6268\", \"0000-0002-0752-7513\", \"0000-0003-2593-7134\", \"0000-0002-1298-5252\", \"0000-0003-1761-3842\", \"0000-0003-2383-8386\", \"0000-0003-3546-2312\", \"0000-0002-2886-9248\", \"0000-0003-2183-8112\", \"0000-0002-1929-6054\", \"0000-0003-4948-9268\", \"0000-0003-2407-3557\", \"0000-0002-9276-6921\", \"0000-0003-1484-6958\", \"0000-0002-7568-3403\", \"0000-0002-4305-4215\", \"0000-0002-4004-6666\", \"0000-0003-0796-0234\", \"0000-0001-7133-6896\", \"0000-0002-8208-0897\", \"0000-0002-9071-5450\", \"0000-0003-4993-5555\", \"0000-0003-0930-6121\", \"0000-0002-8116-9611\", \"0000-0002-5139-2660\", \"0000-0002-3277-9659\", \"0000-0002-8122-879X\", \"0000-0001-9559-1103\", \"0000-0003-2862-6315\", \"0000-0002-2000-8339\", \"0000-0001-5300-4601\", \"0000-0002-6254-8683\", \"0000-0002-6547-0172\", \"0000-0003-4808-6619\", \"0000-0003-3933-0229\", \"0000-0002-0971-9375\", \"0000-0003-0694-1154\", \"0000-0003-1585-1134\", \"0000-0002-4659-5391\", \"0000-0002-2916-2893\", \"0000-0001-6783-2037\", \"0000-0001-6461-2573\", \"0000-0003-4501-3756\", \"0000-0001-5549-6822\", \"0000-0002-8940-3177\", \"0000-0003-4326-9336\", \"0000-0001-8096-4333\", \"0000-0001-8978-4830\", \"0000-0002-5946-1595\", \"0000-0002-6680-1703\", \"0000-0002-8593-9257\", \"0000-0002-7653-4899\", \"0000-0003-1904-4188\", \"0000-0002-5196-4905\", \"0000-0001-8808-4867\", \"0000-0001-6921-0426\", \"0000-0003-1815-1993\", \"0000-0002-7843-8497\", \"0000-0003-1675-2840\", \"0000-0001-8644-2114\", \"0000-0003-0907-9870\", \"0000-0001-7784-0583\", \"0000-0001-7550-5802\", \"0000-0001-8986-2528\", \"0000-0002-5265-6074\", \"0000-0001-9102-8639\", \"0000-0002-0696-8560\", \"0000-0001-6979-4273\", \"0000-0002-7179-6953\", \"0000-0002-3334-9386\", \"0000-0001-6714-009X\", \"0000-0001-7193-5039\", \"0000-0002-5241-1026\", \"0000-0001-7608-9433\"], \"y\": [219, 174, 169, 152, 114, 114, 111, 104, 104, 90, 83, 83, 81, 81, 80, 80, 80, 76, 73, 72, 71, 70, 69, 69, 68, 68, 68, 68, 67, 67, 66, 66, 65, 64, 61, 61, 61, 59, 57, 57, 57, 57, 57, 57, 57, 56, 55, 55, 55, 55, 51, 50, 50, 50, 49, 49, 48, 48, 48, 48, 47, 47, 46, 46, 46, 45, 45, 45, 45, 44, 43, 43, 43, 43, 42, 42, 42, 41, 41, 41, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 38, 37, 37, 37, 37, 37, 36, 36, 36, 36]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top 100 ORCID iDs with URLs\"}, \"xaxis\": {\"range\": [-0.5, 99.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('4089b308-732c-48f6-aa30-b2f71ac024f5');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(100)\n",
"data = [\n",
" go.Bar(\n",
" x=urls_by_orcid[:TOP_N]['orcid'],\n",
" y=urls_by_orcid[:TOP_N]['n_urls']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top %s ORCID iDs with URLs' % TOP_N,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"top_urls = df[['orcid', 'url_domains']]\\\n",
" .explode('url_domains')\\\n",
" .reset_index(drop=True)\\\n",
" .groupby('url_domains')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"linkedin.com",
"researchgate.net",
"google.com",
"cnpq.br",
"academia.edu",
"twitter.com",
"facebook.com",
"publons.com",
"wordpress.com",
"mendeley.com",
"instagram.com",
"github.io",
"google.com.ua",
"blogspot.com",
"github.com",
"google.es",
"helsinki.fi",
"unirioja.es",
"youtube.com",
"wixsite.com",
"ku.dk",
"scopus.com",
"weebly.com",
"us.es",
"kth.se",
"cityu.edu.hk",
"au.dk",
"kcl.ac.uk",
"man.ac.uk",
"google.com.au",
"ucl.ac.uk",
"sdu.dk",
"ugr.es",
"researcherid.com",
"mq.edu.au",
"ntu.edu.tw",
"dtu.dk",
"rug.nl",
"colciencias.gov.co",
"google.co.in",
"bris.ac.uk",
"uwa.edu.au",
"uc3m.es",
"bu.edu",
"vub.be",
"monash.edu",
"google.co.uk",
"aau.dk",
"lancs.ac.uk",
"tumblr.com"
],
"y": [
78418,
67823,
44804,
24635,
21174,
19046,
15368,
10751,
9043,
6960,
6040,
5516,
5371,
5272,
5252,
5163,
4730,
4590,
4470,
4140,
3771,
3586,
3122,
3037,
2957,
2795,
2746,
2724,
2689,
2610,
2586,
2478,
2231,
2134,
2133,
2094,
2002,
1975,
1929,
1917,
1840,
1820,
1804,
1803,
1803,
1772,
1656,
1653,
1650,
1646
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top-50 URL domains"
},
"xaxis": {
"range": [
-0.5,
49.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"361fb694-28b4-4e74-92cc-74d53f51b8a7\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"361fb694-28b4-4e74-92cc-74d53f51b8a7\")) { Plotly.newPlot( \"361fb694-28b4-4e74-92cc-74d53f51b8a7\", [{\"type\": \"bar\", \"x\": [\"linkedin.com\", \"researchgate.net\", \"google.com\", \"cnpq.br\", \"academia.edu\", \"twitter.com\", \"facebook.com\", \"publons.com\", \"wordpress.com\", \"mendeley.com\", \"instagram.com\", \"github.io\", \"google.com.ua\", \"blogspot.com\", \"github.com\", \"google.es\", \"helsinki.fi\", \"unirioja.es\", \"youtube.com\", \"wixsite.com\", \"ku.dk\", \"scopus.com\", \"weebly.com\", \"us.es\", \"kth.se\", \"cityu.edu.hk\", \"au.dk\", \"kcl.ac.uk\", \"man.ac.uk\", \"google.com.au\", \"ucl.ac.uk\", \"sdu.dk\", \"ugr.es\", \"researcherid.com\", \"mq.edu.au\", \"ntu.edu.tw\", \"dtu.dk\", \"rug.nl\", \"colciencias.gov.co\", \"google.co.in\", \"bris.ac.uk\", \"uwa.edu.au\", \"uc3m.es\", \"bu.edu\", \"vub.be\", \"monash.edu\", \"google.co.uk\", \"aau.dk\", \"lancs.ac.uk\", \"tumblr.com\"], \"y\": [78418, 67823, 44804, 24635, 21174, 19046, 15368, 10751, 9043, 6960, 6040, 5516, 5371, 5272, 5252, 5163, 4730, 4590, 4470, 4140, 3771, 3586, 3122, 3037, 2957, 2795, 2746, 2724, 2689, 2610, 2586, 2478, 2231, 2134, 2133, 2094, 2002, 1975, 1929, 1917, 1840, 1820, 1804, 1803, 1803, 1772, 1656, 1653, 1650, 1646]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top-50 URL domains\"}, \"xaxis\": {\"range\": [-0.5, 49.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('361fb694-28b4-4e74-92cc-74d53f51b8a7');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(50)\n",
"data = [\n",
" go.Bar(\n",
" x=top_urls[:TOP_N].index,\n",
" y=top_urls[:TOP_N]['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top-%s URL domains' % TOP_N,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Malformed URLs are left empty"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"exploded_url_domains = df[['orcid', 'url_domains']].explode('url_domains')"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"orcid 71\n",
"url_domains 71\n",
"dtype: int64"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exploded_url_domains[exploded_url_domains.url_domains == ''].count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## URLs speculation"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1025713</th>\n",
" <td>0000-0003-2407-3557</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>abdul</td>\n",
" <td>aziz</td>\n",
" <td>abdul aziz was born on may 25, 1973, in brebes...</td>\n",
" <td>[abdul aziz, aziz, abdul, aziz, a., aziz, abd,...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[ilmu ekonomi, ekonomi islam, etika bisnis isl...</td>\n",
" <td>NaN</td>\n",
" <td>[[ilmu ekonomi, dr, universitas borobudur, jak...</td>\n",
" <td>[[assisten professor/dr, institut agama islam ...</td>\n",
" <td>72</td>\n",
" <td>[base - bielefeld academic search engine, abdu...</td>\n",
" <td>2016-09-12 04:41:24.842000+00:00</td>\n",
" <td>2021-01-26 11:58:33.039000+00:00</td>\n",
" <td>19</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>77</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com, syekhnurjati.ac.id, orcid.org, bl...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>59</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2743648</th>\n",
" <td>0000-0002-5710-4041</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>ryszard</td>\n",
" <td>romaniuk</td>\n",
" <td>professor of electronics and communications en...</td>\n",
" <td>[r.romaniuk, r.s.romaniuk, ryszard romaniuk, r...</td>\n",
" <td>rrom@ise.pw.edu.pl</td>\n",
" <td>[research systems, measurement systems, photon...</td>\n",
" <td>[[isni, 0000000071432485], [researcherid, b-91...</td>\n",
" <td>[[faculty of electronics and information techn...</td>\n",
" <td>[[professor, institute director, politechnika ...</td>\n",
" <td>5008</td>\n",
" <td>[inspire-hep, researcherid, isni2orcid search ...</td>\n",
" <td>2013-01-20 12:09:21.600000+00:00</td>\n",
" <td>2021-03-16 19:37:31.650000+00:00</td>\n",
" <td>1221</td>\n",
" <td>25</td>\n",
" <td>0</td>\n",
" <td>1742</td>\n",
" <td>True</td>\n",
" <td>ise.pw.edu.pl</td>\n",
" <td>[ise.pw.edu.pl, elka.pw.edu.pl, cern.ch]</td>\n",
" <td>[google.pl, publons.com, scopus.com, mendeley....</td>\n",
" <td>3</td>\n",
" <td>114</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3011724</th>\n",
" <td>0000-0003-2450-090X</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>eduard</td>\n",
" <td>babulak</td>\n",
" <td>professor eduard babulak is accomplished inter...</td>\n",
" <td>[professor eduard babulak]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[internet of things, next generation of ict an...</td>\n",
" <td>[[scopus author id, 6506867432], [researcherid...</td>\n",
" <td>[[information technology, doctor habilitated (...</td>\n",
" <td>[[consultant, horizon 2020 framework programme...</td>\n",
" <td>274</td>\n",
" <td>[the lens, base - bielefeld academic search en...</td>\n",
" <td>2013-04-03 08:02:30.013000+00:00</td>\n",
" <td>2021-02-28 10:07:13.231000+00:00</td>\n",
" <td>199</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>174</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[worldassessmentcouncil.org, spseke.sk, bcs.or...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>114</td>\n",
" <td>5</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" <td>22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3881064</th>\n",
" <td>0000-0002-3920-7389</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>а.</td>\n",
" <td>гусев</td>\n",
" <td>surname, name gusev alexander leonidovichdate...</td>\n",
" <td>[alexander l. gusev , alexander leonidovich gu...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[nanomaterials and nanocomposites, supercapaci...</td>\n",
" <td>[[researcherid, f-8048-2014], [scopus author i...</td>\n",
" <td>[[chemical technology and cryogenic-vacuum tec...</td>\n",
" <td>[[general director, scientific technical centr...</td>\n",
" <td>472</td>\n",
" <td>[publons, datacite, scopus - elsevier, a.l. gu...</td>\n",
" <td>2014-05-14 00:01:28.030000+00:00</td>\n",
" <td>2021-01-16 13:44:14.134000+00:00</td>\n",
" <td>37</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>21</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[youtube.com, isjaee.com, researchgate.net, re...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>111</td>\n",
" <td>2</td>\n",
" <td>16</td>\n",
" <td>2</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7466062</th>\n",
" <td>0000-0002-1929-6054</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>franklin américo</td>\n",
" <td>canaza choque</td>\n",
" <td>docente-investigador social. maestrando en der...</td>\n",
" <td>[franklin américo canaza-choque , franklin a. ...</td>\n",
" <td>leo_123fa@hotmail.com</td>\n",
" <td>[filosofía; educación; políticas de desarrollo...</td>\n",
" <td>[[researcherid, p-8613-2018], [loop profile, 8...</td>\n",
" <td>[[facultad de ciencias de la educación , maest...</td>\n",
" <td>[[investigador social, universidad católica de...</td>\n",
" <td>39</td>\n",
" <td>[researcherid, base - bielefeld academic searc...</td>\n",
" <td>2017-09-15 19:45:43.483000+00:00</td>\n",
" <td>2021-03-23 20:12:47.297000+00:00</td>\n",
" <td>30</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>34</td>\n",
" <td>True</td>\n",
" <td>hotmail.com</td>\n",
" <td>[gmail.com, gmail.com, hotmail.com, baldwin.ed...</td>\n",
" <td>[concytec.gob.pe, redalyc.org, redalyc.org, un...</td>\n",
" <td>5</td>\n",
" <td>61</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7517096</th>\n",
" <td>0000-0003-4948-9268</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>gustavo</td>\n",
" <td>duperré</td>\n",
" <td>gustavo norberto duperré graduated in arts and...</td>\n",
" <td>[gustavo norberto duperré, duperré, g. n., gus...</td>\n",
" <td>gustavo.duperre@usal.edu.ar</td>\n",
" <td>[sciences of antiquity, cultural heritage, ear...</td>\n",
" <td>[[scopus author id, 57195936346], [researcheri...</td>\n",
" <td>[[programme in history, history of art and ter...</td>\n",
" <td>[[titular professor, dirección general de cult...</td>\n",
" <td>41</td>\n",
" <td>[gustavo duperré, scopus - elsevier, publons, ...</td>\n",
" <td>2020-02-22 15:49:52.386000+00:00</td>\n",
" <td>2021-03-12 15:13:44.065000+00:00</td>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>34</td>\n",
" <td>False</td>\n",
" <td>usal.edu.ar</td>\n",
" <td>NaN</td>\n",
" <td>[icomos.ro, unirioja.es, unirioja.es, unc.edu....</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>61</td>\n",
" <td>2</td>\n",
" <td>11</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8068275</th>\n",
" <td>0000-0003-2183-8112</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>pelayo munhoz</td>\n",
" <td>olea</td>\n",
" <td>pós-doutorado em gestão ambiental pela univers...</td>\n",
" <td>[ munhoz, pelayo olea, olea, pelayo, olea, p...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[inovação, sustentabilidade, empreendedorismo]</td>\n",
" <td>[[scopus author id, 55175503300], [researcheri...</td>\n",
" <td>[[, postdoctoral in environmental sustainabili...</td>\n",
" <td>[[professor, universidade federal do rio grand...</td>\n",
" <td>1109</td>\n",
" <td>[the lens, pelayo munhoz olea, dimensions, bas...</td>\n",
" <td>2013-02-04 17:25:34.723000+00:00</td>\n",
" <td>2021-03-19 18:51:01.128000+00:00</td>\n",
" <td>798</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>582</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>61</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8184260</th>\n",
" <td>0000-0002-6938-9638</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>adolfo</td>\n",
" <td>catral sanabria</td>\n",
" <td>my education is in computer science, mathemati...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>[[loop profile, 747193]]</td>\n",
" <td>[[education, capacitación para la enseñanza en...</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" <td>[base - bielefeld academic search engine, data...</td>\n",
" <td>2019-05-07 19:27:02.210000+00:00</td>\n",
" <td>2020-12-10 23:39:15.236000+00:00</td>\n",
" <td>2022</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[researchgate.net, youtube.com, linkedin.com, ...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>152</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>6</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8791256</th>\n",
" <td>0000-0002-9025-8632</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>buycannabis</td>\n",
" <td>dispensary</td>\n",
" <td>we procure and deliver premium cannabis strain...</td>\n",
" <td>[we procure and deliver premium cannabis strai...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[marijuana, canabis dispensary, marijuana stoc...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>10</td>\n",
" <td>[goowonderland dispensary]</td>\n",
" <td>2020-12-09 21:19:46.004000+00:00</td>\n",
" <td>2020-12-10 01:17:28.772000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[goowonderland.com, goowonderland.com, goowond...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>81</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>7</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10174509</th>\n",
" <td>0000-0002-9965-2425</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>jaroslaw</td>\n",
" <td>spychala</td>\n",
" <td>jaroslaw spychala has received a doctoral degr...</td>\n",
" <td>[jaroslaw jozef spychala]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[organic chemistry, medicinal and pharmaceutic...</td>\n",
" <td>[[scopus author id, 7006745874]]</td>\n",
" <td>[[department of chemistry, postdoctoral associ...</td>\n",
" <td>[[assistant professor, adam mickiewicz univers...</td>\n",
" <td>29</td>\n",
" <td>[scopus - elsevier]</td>\n",
" <td>2014-09-18 12:34:14.242000+00:00</td>\n",
" <td>2020-02-11 14:31:25.544000+00:00</td>\n",
" <td>15</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>29</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[biowebspin.com, biowebspin.com, google.com, l...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>73</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10257808</th>\n",
" <td>0000-0002-4062-3603</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>juan de dios</td>\n",
" <td>beltrán mancilla</td>\n",
" <td>juan de dios beltrán mancilla (*) filósofo aut...</td>\n",
" <td>[juan de dios beltrán mancilla, filósofo autod...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[filosofia medicina arquitectura economía dere...</td>\n",
" <td>NaN</td>\n",
" <td>[[, diplomado en practicas directivas para or...</td>\n",
" <td>[[inspector general jornada vespertina // de 2...</td>\n",
" <td>11</td>\n",
" <td>[juan de dios beltr´´án mancilla]</td>\n",
" <td>2020-04-19 21:06:33.495000+00:00</td>\n",
" <td>2021-02-10 20:13:07.698000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[yumpu.com, ijopm.org, google.com, blogspot.co...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>69</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10486212</th>\n",
" <td>0000-0002-3997-5070</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>dr. parameshachari</td>\n",
" <td>b d</td>\n",
" <td>dr. parameshachari b dacm distinguished speake...</td>\n",
" <td>[dr. parameshachari b d]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[honorary secretary| iete mysuru centre, profe...</td>\n",
" <td>[[researcherid, f-7045-2018], [scopus author i...</td>\n",
" <td>[[electronics and communication engineering, p...</td>\n",
" <td>[[acm distinguished speaker (volunteer), assoc...</td>\n",
" <td>93</td>\n",
" <td>[publons, multidisciplinary digital publishing...</td>\n",
" <td>2016-08-24 11:00:30.403000+00:00</td>\n",
" <td>2021-03-23 07:16:22.582000+00:00</td>\n",
" <td>47</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>48</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[geethashishu.in, geethashishu.in, acm.org, go...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>71</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>5</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10652632</th>\n",
" <td>0000-0003-2593-7134</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>aan</td>\n",
" <td>jaelani</td>\n",
" <td>all my papers can be downloaded from portal:re...</td>\n",
" <td>[jaelani, a., jaelani, aan]</td>\n",
" <td>aan_jaelani@syekhnurjati.ac.id</td>\n",
" <td>[tourism industry, public finance &amp; budgeting,...</td>\n",
" <td>[[scopus author id, 57195963463], [loop profil...</td>\n",
" <td>[[post graduate, s3/dr, universitas islam nege...</td>\n",
" <td>[[dr, institut agama islam negeri syekh nurjat...</td>\n",
" <td>79</td>\n",
" <td>[publons, aan jaelani, scopus - elsevier, dime...</td>\n",
" <td>2016-03-02 18:37:44.989000+00:00</td>\n",
" <td>2021-03-19 10:11:57.908000+00:00</td>\n",
" <td>88</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>193</td>\n",
" <td>True</td>\n",
" <td>syekhnurjati.ac.id</td>\n",
" <td>[gmail.com]</td>\n",
" <td>[microsoft.com, twitter.com, academia.edu, aca...</td>\n",
" <td>1</td>\n",
" <td>67</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"1025713 0000-0003-2407-3557 True True \n",
"2743648 0000-0002-5710-4041 True True \n",
"3011724 0000-0003-2450-090X True True \n",
"3881064 0000-0002-3920-7389 True True \n",
"7466062 0000-0002-1929-6054 True True \n",
"7517096 0000-0003-4948-9268 True True \n",
"8068275 0000-0003-2183-8112 True True \n",
"8184260 0000-0002-6938-9638 True True \n",
"8791256 0000-0002-9025-8632 True True \n",
"10174509 0000-0002-9965-2425 True True \n",
"10257808 0000-0002-4062-3603 True True \n",
"10486212 0000-0002-3997-5070 True True \n",
"10652632 0000-0003-2593-7134 True True \n",
"\n",
" given_names family_name \\\n",
"1025713 abdul aziz \n",
"2743648 ryszard romaniuk \n",
"3011724 eduard babulak \n",
"3881064 а. гусев \n",
"7466062 franklin américo canaza choque \n",
"7517096 gustavo duperré \n",
"8068275 pelayo munhoz olea \n",
"8184260 adolfo catral sanabria \n",
"8791256 buycannabis dispensary \n",
"10174509 jaroslaw spychala \n",
"10257808 juan de dios beltrán mancilla \n",
"10486212 dr. parameshachari b d \n",
"10652632 aan jaelani \n",
"\n",
" biography \\\n",
"1025713 abdul aziz was born on may 25, 1973, in brebes... \n",
"2743648 professor of electronics and communications en... \n",
"3011724 professor eduard babulak is accomplished inter... \n",
"3881064 surname, name gusev alexander leonidovichdate... \n",
"7466062 docente-investigador social. maestrando en der... \n",
"7517096 gustavo norberto duperré graduated in arts and... \n",
"8068275 pós-doutorado em gestão ambiental pela univers... \n",
"8184260 my education is in computer science, mathemati... \n",
"8791256 we procure and deliver premium cannabis strain... \n",
"10174509 jaroslaw spychala has received a doctoral degr... \n",
"10257808 juan de dios beltrán mancilla (*) filósofo aut... \n",
"10486212 dr. parameshachari b dacm distinguished speake... \n",
"10652632 all my papers can be downloaded from portal:re... \n",
"\n",
" other_names \\\n",
"1025713 [abdul aziz, aziz, abdul, aziz, a., aziz, abd,... \n",
"2743648 [r.romaniuk, r.s.romaniuk, ryszard romaniuk, r... \n",
"3011724 [professor eduard babulak] \n",
"3881064 [alexander l. gusev , alexander leonidovich gu... \n",
"7466062 [franklin américo canaza-choque , franklin a. ... \n",
"7517096 [gustavo norberto duperré, duperré, g. n., gus... \n",
"8068275 [ munhoz, pelayo olea, olea, pelayo, olea, p... \n",
"8184260 NaN \n",
"8791256 [we procure and deliver premium cannabis strai... \n",
"10174509 [jaroslaw jozef spychala] \n",
"10257808 [juan de dios beltrán mancilla, filósofo autod... \n",
"10486212 [dr. parameshachari b d] \n",
"10652632 [jaelani, a., jaelani, aan] \n",
"\n",
" primary_email \\\n",
"1025713 <NA> \n",
"2743648 rrom@ise.pw.edu.pl \n",
"3011724 <NA> \n",
"3881064 <NA> \n",
"7466062 leo_123fa@hotmail.com \n",
"7517096 gustavo.duperre@usal.edu.ar \n",
"8068275 <NA> \n",
"8184260 <NA> \n",
"8791256 <NA> \n",
"10174509 <NA> \n",
"10257808 <NA> \n",
"10486212 <NA> \n",
"10652632 aan_jaelani@syekhnurjati.ac.id \n",
"\n",
" keywords \\\n",
"1025713 [ilmu ekonomi, ekonomi islam, etika bisnis isl... \n",
"2743648 [research systems, measurement systems, photon... \n",
"3011724 [internet of things, next generation of ict an... \n",
"3881064 [nanomaterials and nanocomposites, supercapaci... \n",
"7466062 [filosofía; educación; políticas de desarrollo... \n",
"7517096 [sciences of antiquity, cultural heritage, ear... \n",
"8068275 [inovação, sustentabilidade, empreendedorismo] \n",
"8184260 NaN \n",
"8791256 [marijuana, canabis dispensary, marijuana stoc... \n",
"10174509 [organic chemistry, medicinal and pharmaceutic... \n",
"10257808 [filosofia medicina arquitectura economía dere... \n",
"10486212 [honorary secretary| iete mysuru centre, profe... \n",
"10652632 [tourism industry, public finance & budgeting,... \n",
"\n",
" external_ids \\\n",
"1025713 NaN \n",
"2743648 [[isni, 0000000071432485], [researcherid, b-91... \n",
"3011724 [[scopus author id, 6506867432], [researcherid... \n",
"3881064 [[researcherid, f-8048-2014], [scopus author i... \n",
"7466062 [[researcherid, p-8613-2018], [loop profile, 8... \n",
"7517096 [[scopus author id, 57195936346], [researcheri... \n",
"8068275 [[scopus author id, 55175503300], [researcheri... \n",
"8184260 [[loop profile, 747193]] \n",
"8791256 NaN \n",
"10174509 [[scopus author id, 7006745874]] \n",
"10257808 NaN \n",
"10486212 [[researcherid, f-7045-2018], [scopus author i... \n",
"10652632 [[scopus author id, 57195963463], [loop profil... \n",
"\n",
" education \\\n",
"1025713 [[ilmu ekonomi, dr, universitas borobudur, jak... \n",
"2743648 [[faculty of electronics and information techn... \n",
"3011724 [[information technology, doctor habilitated (... \n",
"3881064 [[chemical technology and cryogenic-vacuum tec... \n",
"7466062 [[facultad de ciencias de la educación , maest... \n",
"7517096 [[programme in history, history of art and ter... \n",
"8068275 [[, postdoctoral in environmental sustainabili... \n",
"8184260 [[education, capacitación para la enseñanza en... \n",
"8791256 NaN \n",
"10174509 [[department of chemistry, postdoctoral associ... \n",
"10257808 [[, diplomado en practicas directivas para or... \n",
"10486212 [[electronics and communication engineering, p... \n",
"10652632 [[post graduate, s3/dr, universitas islam nege... \n",
"\n",
" employment n_works \\\n",
"1025713 [[assisten professor/dr, institut agama islam ... 72 \n",
"2743648 [[professor, institute director, politechnika ... 5008 \n",
"3011724 [[consultant, horizon 2020 framework programme... 274 \n",
"3881064 [[general director, scientific technical centr... 472 \n",
"7466062 [[investigador social, universidad católica de... 39 \n",
"7517096 [[titular professor, dirección general de cult... 41 \n",
"8068275 [[professor, universidade federal do rio grand... 1109 \n",
"8184260 NaN 2023 \n",
"8791256 NaN 10 \n",
"10174509 [[assistant professor, adam mickiewicz univers... 29 \n",
"10257808 [[inspector general jornada vespertina // de 2... 11 \n",
"10486212 [[acm distinguished speaker (volunteer), assoc... 93 \n",
"10652632 [[dr, institut agama islam negeri syekh nurjat... 79 \n",
"\n",
" works_source \\\n",
"1025713 [base - bielefeld academic search engine, abdu... \n",
"2743648 [inspire-hep, researcherid, isni2orcid search ... \n",
"3011724 [the lens, base - bielefeld academic search en... \n",
"3881064 [publons, datacite, scopus - elsevier, a.l. gu... \n",
"7466062 [researcherid, base - bielefeld academic searc... \n",
"7517096 [gustavo duperré, scopus - elsevier, publons, ... \n",
"8068275 [the lens, pelayo munhoz olea, dimensions, bas... \n",
"8184260 [base - bielefeld academic search engine, data... \n",
"8791256 [goowonderland dispensary] \n",
"10174509 [scopus - elsevier] \n",
"10257808 [juan de dios beltr´´án mancilla] \n",
"10486212 [publons, multidisciplinary digital publishing... \n",
"10652632 [publons, aan jaelani, scopus - elsevier, dime... \n",
"\n",
" activation_date last_update_date \\\n",
"1025713 2016-09-12 04:41:24.842000+00:00 2021-01-26 11:58:33.039000+00:00 \n",
"2743648 2013-01-20 12:09:21.600000+00:00 2021-03-16 19:37:31.650000+00:00 \n",
"3011724 2013-04-03 08:02:30.013000+00:00 2021-02-28 10:07:13.231000+00:00 \n",
"3881064 2014-05-14 00:01:28.030000+00:00 2021-01-16 13:44:14.134000+00:00 \n",
"7466062 2017-09-15 19:45:43.483000+00:00 2021-03-23 20:12:47.297000+00:00 \n",
"7517096 2020-02-22 15:49:52.386000+00:00 2021-03-12 15:13:44.065000+00:00 \n",
"8068275 2013-02-04 17:25:34.723000+00:00 2021-03-19 18:51:01.128000+00:00 \n",
"8184260 2019-05-07 19:27:02.210000+00:00 2020-12-10 23:39:15.236000+00:00 \n",
"8791256 2020-12-09 21:19:46.004000+00:00 2020-12-10 01:17:28.772000+00:00 \n",
"10174509 2014-09-18 12:34:14.242000+00:00 2020-02-11 14:31:25.544000+00:00 \n",
"10257808 2020-04-19 21:06:33.495000+00:00 2021-02-10 20:13:07.698000+00:00 \n",
"10486212 2016-08-24 11:00:30.403000+00:00 2021-03-23 07:16:22.582000+00:00 \n",
"10652632 2016-03-02 18:37:44.989000+00:00 2021-03-19 10:11:57.908000+00:00 \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"1025713 19 0 0 77 False NaN \n",
"2743648 1221 25 0 1742 True ise.pw.edu.pl \n",
"3011724 199 0 1 174 False NaN \n",
"3881064 37 0 0 21 False NaN \n",
"7466062 30 0 0 34 True hotmail.com \n",
"7517096 13 0 0 34 False usal.edu.ar \n",
"8068275 798 0 1 582 True NaN \n",
"8184260 2022 0 0 16 False NaN \n",
"8791256 0 0 0 0 False NaN \n",
"10174509 15 0 0 29 True NaN \n",
"10257808 0 0 0 7 False NaN \n",
"10486212 47 0 0 48 False NaN \n",
"10652632 88 0 0 193 True syekhnurjati.ac.id \n",
"\n",
" other_email_domains \\\n",
"1025713 NaN \n",
"2743648 [ise.pw.edu.pl, elka.pw.edu.pl, cern.ch] \n",
"3011724 NaN \n",
"3881064 NaN \n",
"7466062 [gmail.com, gmail.com, hotmail.com, baldwin.ed... \n",
"7517096 NaN \n",
"8068275 NaN \n",
"8184260 NaN \n",
"8791256 NaN \n",
"10174509 NaN \n",
"10257808 NaN \n",
"10486212 NaN \n",
"10652632 [gmail.com] \n",
"\n",
" url_domains n_emails n_urls \\\n",
"1025713 [google.com, syekhnurjati.ac.id, orcid.org, bl... <NA> 59 \n",
"2743648 [google.pl, publons.com, scopus.com, mendeley.... 3 114 \n",
"3011724 [worldassessmentcouncil.org, spseke.sk, bcs.or... <NA> 114 \n",
"3881064 [youtube.com, isjaee.com, researchgate.net, re... <NA> 111 \n",
"7466062 [concytec.gob.pe, redalyc.org, redalyc.org, un... 5 61 \n",
"7517096 [icomos.ro, unirioja.es, unirioja.es, unc.edu.... <NA> 61 \n",
"8068275 [cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c... <NA> 61 \n",
"8184260 [researchgate.net, youtube.com, linkedin.com, ... <NA> 152 \n",
"8791256 [goowonderland.com, goowonderland.com, goowond... <NA> 81 \n",
"10174509 [biowebspin.com, biowebspin.com, google.com, l... <NA> 73 \n",
"10257808 [yumpu.com, ijopm.org, google.com, blogspot.co... <NA> 69 \n",
"10486212 [geethashishu.in, geethashishu.in, acm.org, go... <NA> 71 \n",
"10652632 [microsoft.com, twitter.com, academia.edu, aca... 1 67 \n",
"\n",
" n_ids n_keywords n_education n_employment \n",
"1025713 <NA> 4 3 1 \n",
"2743648 3 5 1 1 \n",
"3011724 5 8 6 22 \n",
"3881064 2 16 2 7 \n",
"7466062 4 2 1 1 \n",
"7517096 2 11 6 5 \n",
"8068275 2 3 7 9 \n",
"8184260 1 <NA> 6 <NA> \n",
"8791256 <NA> 7 <NA> <NA> \n",
"10174509 1 4 4 2 \n",
"10257808 <NA> 1 8 6 \n",
"10486212 3 6 5 10 \n",
"10652632 4 7 2 1 "
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[(df['url_domains'].str.len() > 50) & (df['n_works'] > 0)]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>47439</th>\n",
" <td>0000-0002-5967-2835</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>oleksiy</td>\n",
" <td>goryayinov</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[алексей николаевич горяинов, о.м.горяїнов, а....</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[diagnostics, transport, logistics]</td>\n",
" <td>[[researcherid, i-7977-2016]]</td>\n",
" <td>[[, дистанционный курс «ctl.sc2x: supply chain...</td>\n",
" <td>[[docent, kharkiv petro vasylenko national tec...</td>\n",
" <td>274</td>\n",
" <td>[oleksiy goryayinov]</td>\n",
" <td>2014-08-03 18:06:42.925000+00:00</td>\n",
" <td>2021-03-22 13:56:48.311000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[khntusg.com.ua, khntusg.com.ua, google.com.ua...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>13</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>14</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>72557</th>\n",
" <td>0000-0002-3505-2797</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>nurul</td>\n",
" <td>malahayati</td>\n",
" <td>google scholar</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>[[researcherid, q-3861-2017]]</td>\n",
" <td>[[civil and transportation engineering , maste...</td>\n",
" <td>[[senior lecturer, universitas syiah kuala, ba...</td>\n",
" <td>6</td>\n",
" <td>[nurul malahayati]</td>\n",
" <td>2017-10-01 00:46:31.324000+00:00</td>\n",
" <td>2019-08-19 15:52:47.253000+00:00</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com, ristekdikti.go.id, unsyiah.ac.id,...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>16</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>94081</th>\n",
" <td>0000-0003-3670-9620</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>carlos</td>\n",
" <td>barrera</td>\n",
" <td>im individual inventor, and this is my work; s...</td>\n",
" <td>[retrodynamic, novelinflow]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[engineering, gearturbine, technology, power, ...</td>\n",
" <td>[[loop profile, 394457]]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>[carlos barrera]</td>\n",
" <td>2016-08-29 20:32:10.362000+00:00</td>\n",
" <td>2021-02-09 04:56:35.554000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[blogspot.mx, behance.net, authorstream.com, d...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>24</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>261673</th>\n",
" <td>0000-0002-5441-0465</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>nuria</td>\n",
" <td>hernández-león</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[nuria h. león, nuria hernández león, hernánde...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[icts, human resources, psychology of organiza...</td>\n",
" <td>NaN</td>\n",
" <td>[[, course: social skills, university of salam...</td>\n",
" <td>[[merchandise reception and expedition trainer...</td>\n",
" <td>11</td>\n",
" <td>[nuria hernández-león]</td>\n",
" <td>2015-11-28 07:18:58.442000+00:00</td>\n",
" <td>2021-03-05 16:37:47.403000+00:00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[feriaempresamujer.com, escueladenegociosydire...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>16</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>7</td>\n",
" <td>19</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>326211</th>\n",
" <td>0000-0002-7781-6767</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>mohd nazri</td>\n",
" <td>ismail</td>\n",
" <td>born in penang, malaysia in 1971, dr. mohd had...</td>\n",
" <td>[ndum (national defence university of malaysia)]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[wsn, design of network ip address, network ma...</td>\n",
" <td>[[scopus author id, 24372977800], [researcheri...</td>\n",
" <td>NaN</td>\n",
" <td>[[lecturer, universiti pertahanan nasional mal...</td>\n",
" <td>35</td>\n",
" <td>[scopus - elsevier]</td>\n",
" <td>2016-09-06 02:25:52.974000+00:00</td>\n",
" <td>2020-10-20 06:55:55.051000+00:00</td>\n",
" <td>24</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>35</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com.my, researchgate.net, academia.edu...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>16</td>\n",
" <td>2</td>\n",
" <td>10</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10579801</th>\n",
" <td>0000-0001-5087-6965</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>robert</td>\n",
" <td>ohara</td>\n",
" <td>systematics, evolutionary biology, and the his...</td>\n",
" <td>[r. ohara, r.j. ohara, robert ohara, robert...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[evolutionary biology, education, new england ...</td>\n",
" <td>[[isni, 0000000138200102], [researcherid, b-47...</td>\n",
" <td>[[biology, ph.d., harvard university, cambridg...</td>\n",
" <td>NaN</td>\n",
" <td>45</td>\n",
" <td>[robert j. ohara]</td>\n",
" <td>2014-09-21 02:45:19.620000+00:00</td>\n",
" <td>2020-07-09 06:51:09.228000+00:00</td>\n",
" <td>23</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>72</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[rjohara.net, google.com, collegiateway.org, r...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>12</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10590882</th>\n",
" <td>0000-0002-3318-9861</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>shagufta</td>\n",
" <td>perveen</td>\n",
" <td>prof. dr. shagufta perveen is a professor at k...</td>\n",
" <td>NaN</td>\n",
" <td>shagufta792000@yahoo.com</td>\n",
" <td>[shagufta perveen msu, shagufta perveen uk, sh...</td>\n",
" <td>NaN</td>\n",
" <td>[[hej research institute of chemistry, phd che...</td>\n",
" <td>[[professor, king saud university college of p...</td>\n",
" <td>66</td>\n",
" <td>[scopus - elsevier]</td>\n",
" <td>2015-12-21 10:34:06.771000+00:00</td>\n",
" <td>2021-02-22 14:58:30.893000+00:00</td>\n",
" <td>56</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>66</td>\n",
" <td>True</td>\n",
" <td>yahoo.com</td>\n",
" <td>[msu.edu, ksu.edu.sa]</td>\n",
" <td>[shaguftaperveen.com, researchgate.net, ksu.ed...</td>\n",
" <td>2</td>\n",
" <td>11</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10766062</th>\n",
" <td>0000-0001-8960-9004</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>susan</td>\n",
" <td>bastani</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[s. bastani, سوسن باستانی]</td>\n",
" <td>sbastani@alzahra.ac.ir</td>\n",
" <td>[fuzzy logic, personal networks, social networ...</td>\n",
" <td>[[scopus author id, 16642098400]]</td>\n",
" <td>[[sociology, ph.d., university of toronto, tor...</td>\n",
" <td>[[professor, alzahra university, tehran, vanak...</td>\n",
" <td>20</td>\n",
" <td>[scopus - elsevier]</td>\n",
" <td>2019-07-10 06:50:46.255000+00:00</td>\n",
" <td>2020-10-07 04:08:01.961000+00:00</td>\n",
" <td>19</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>33</td>\n",
" <td>True</td>\n",
" <td>alzahra.ac.ir</td>\n",
" <td>[gmail.com, gmail.com]</td>\n",
" <td>[scopus.com, google.com, publons.com, zenodo.o...</td>\n",
" <td>2</td>\n",
" <td>11</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10807839</th>\n",
" <td>0000-0002-4379-6454</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>caroline wanjiru</td>\n",
" <td>kariuki</td>\n",
" <td>caroline holds a phd in economics from curtin ...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[applied economics, financial economics, devel...</td>\n",
" <td>NaN</td>\n",
" <td>[[economics, doctor of philosophy , curtin uni...</td>\n",
" <td>[[director, educational development, strathmor...</td>\n",
" <td>4</td>\n",
" <td>[caroline wanjiru kariuki]</td>\n",
" <td>2020-03-18 10:18:04.007000+00:00</td>\n",
" <td>2021-02-11 14:40:38.515000+00:00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[scopus.com, mendeley.com, publons.com, resear...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>13</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10911966</th>\n",
" <td>0000-0003-2311-0600</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>myo</td>\n",
" <td>kyaw hlaing</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[dr myo kyaw hlaing]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[economic geology]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[lecturer, union of myanmar ministry of educa...</td>\n",
" <td>2</td>\n",
" <td>[myo kyaw hlaing]</td>\n",
" <td>2018-12-26 12:51:57.801000+00:00</td>\n",
" <td>2021-01-26 14:36:47.421000+00:00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[facebook.com, linkedin.com, instagram.com, re...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>12</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>140 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"47439 0000-0002-5967-2835 True True \n",
"72557 0000-0002-3505-2797 True True \n",
"94081 0000-0003-3670-9620 True True \n",
"261673 0000-0002-5441-0465 True True \n",
"326211 0000-0002-7781-6767 True True \n",
"... ... ... ... \n",
"10579801 0000-0001-5087-6965 True True \n",
"10590882 0000-0002-3318-9861 True True \n",
"10766062 0000-0001-8960-9004 True True \n",
"10807839 0000-0002-4379-6454 True True \n",
"10911966 0000-0003-2311-0600 True True \n",
"\n",
" given_names family_name \\\n",
"47439 oleksiy goryayinov \n",
"72557 nurul malahayati \n",
"94081 carlos barrera \n",
"261673 nuria hernández-león \n",
"326211 mohd nazri ismail \n",
"... ... ... \n",
"10579801 robert ohara \n",
"10590882 shagufta perveen \n",
"10766062 susan bastani \n",
"10807839 caroline wanjiru kariuki \n",
"10911966 myo kyaw hlaing \n",
"\n",
" biography \\\n",
"47439 <NA> \n",
"72557 google scholar \n",
"94081 im individual inventor, and this is my work; s... \n",
"261673 <NA> \n",
"326211 born in penang, malaysia in 1971, dr. mohd had... \n",
"... ... \n",
"10579801 systematics, evolutionary biology, and the his... \n",
"10590882 prof. dr. shagufta perveen is a professor at k... \n",
"10766062 <NA> \n",
"10807839 caroline holds a phd in economics from curtin ... \n",
"10911966 <NA> \n",
"\n",
" other_names \\\n",
"47439 [алексей николаевич горяинов, о.м.горяїнов, а.... \n",
"72557 NaN \n",
"94081 [retrodynamic, novelinflow] \n",
"261673 [nuria h. león, nuria hernández león, hernánde... \n",
"326211 [ndum (national defence university of malaysia)] \n",
"... ... \n",
"10579801 [r. ohara, r.j. ohara, robert ohara, robert... \n",
"10590882 NaN \n",
"10766062 [s. bastani, سوسن باستانی] \n",
"10807839 NaN \n",
"10911966 [dr myo kyaw hlaing] \n",
"\n",
" primary_email \\\n",
"47439 <NA> \n",
"72557 <NA> \n",
"94081 <NA> \n",
"261673 <NA> \n",
"326211 <NA> \n",
"... ... \n",
"10579801 <NA> \n",
"10590882 shagufta792000@yahoo.com \n",
"10766062 sbastani@alzahra.ac.ir \n",
"10807839 <NA> \n",
"10911966 <NA> \n",
"\n",
" keywords \\\n",
"47439 [diagnostics, transport, logistics] \n",
"72557 NaN \n",
"94081 [engineering, gearturbine, technology, power, ... \n",
"261673 [icts, human resources, psychology of organiza... \n",
"326211 [wsn, design of network ip address, network ma... \n",
"... ... \n",
"10579801 [evolutionary biology, education, new england ... \n",
"10590882 [shagufta perveen msu, shagufta perveen uk, sh... \n",
"10766062 [fuzzy logic, personal networks, social networ... \n",
"10807839 [applied economics, financial economics, devel... \n",
"10911966 [economic geology] \n",
"\n",
" external_ids \\\n",
"47439 [[researcherid, i-7977-2016]] \n",
"72557 [[researcherid, q-3861-2017]] \n",
"94081 [[loop profile, 394457]] \n",
"261673 NaN \n",
"326211 [[scopus author id, 24372977800], [researcheri... \n",
"... ... \n",
"10579801 [[isni, 0000000138200102], [researcherid, b-47... \n",
"10590882 NaN \n",
"10766062 [[scopus author id, 16642098400]] \n",
"10807839 NaN \n",
"10911966 NaN \n",
"\n",
" education \\\n",
"47439 [[, дистанционный курс «ctl.sc2x: supply chain... \n",
"72557 [[civil and transportation engineering , maste... \n",
"94081 NaN \n",
"261673 [[, course: social skills, university of salam... \n",
"326211 NaN \n",
"... ... \n",
"10579801 [[biology, ph.d., harvard university, cambridg... \n",
"10590882 [[hej research institute of chemistry, phd che... \n",
"10766062 [[sociology, ph.d., university of toronto, tor... \n",
"10807839 [[economics, doctor of philosophy , curtin uni... \n",
"10911966 NaN \n",
"\n",
" employment n_works \\\n",
"47439 [[docent, kharkiv petro vasylenko national tec... 274 \n",
"72557 [[senior lecturer, universitas syiah kuala, ba... 6 \n",
"94081 NaN 1 \n",
"261673 [[merchandise reception and expedition trainer... 11 \n",
"326211 [[lecturer, universiti pertahanan nasional mal... 35 \n",
"... ... ... \n",
"10579801 NaN 45 \n",
"10590882 [[professor, king saud university college of p... 66 \n",
"10766062 [[professor, alzahra university, tehran, vanak... 20 \n",
"10807839 [[director, educational development, strathmor... 4 \n",
"10911966 [[lecturer, union of myanmar ministry of educa... 2 \n",
"\n",
" works_source activation_date \\\n",
"47439 [oleksiy goryayinov] 2014-08-03 18:06:42.925000+00:00 \n",
"72557 [nurul malahayati] 2017-10-01 00:46:31.324000+00:00 \n",
"94081 [carlos barrera] 2016-08-29 20:32:10.362000+00:00 \n",
"261673 [nuria hernández-león] 2015-11-28 07:18:58.442000+00:00 \n",
"326211 [scopus - elsevier] 2016-09-06 02:25:52.974000+00:00 \n",
"... ... ... \n",
"10579801 [robert j. ohara] 2014-09-21 02:45:19.620000+00:00 \n",
"10590882 [scopus - elsevier] 2015-12-21 10:34:06.771000+00:00 \n",
"10766062 [scopus - elsevier] 2019-07-10 06:50:46.255000+00:00 \n",
"10807839 [caroline wanjiru kariuki] 2020-03-18 10:18:04.007000+00:00 \n",
"10911966 [myo kyaw hlaing] 2018-12-26 12:51:57.801000+00:00 \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc \\\n",
"47439 2021-03-22 13:56:48.311000+00:00 0 0 0 \n",
"72557 2019-08-19 15:52:47.253000+00:00 3 0 0 \n",
"94081 2021-02-09 04:56:35.554000+00:00 0 0 0 \n",
"261673 2021-03-05 16:37:47.403000+00:00 1 0 0 \n",
"326211 2020-10-20 06:55:55.051000+00:00 24 0 0 \n",
"... ... ... ... ... \n",
"10579801 2020-07-09 06:51:09.228000+00:00 23 0 0 \n",
"10590882 2021-02-22 14:58:30.893000+00:00 56 0 0 \n",
"10766062 2020-10-07 04:08:01.961000+00:00 19 0 0 \n",
"10807839 2021-02-11 14:40:38.515000+00:00 1 0 0 \n",
"10911966 2021-01-26 14:36:47.421000+00:00 1 0 0 \n",
"\n",
" n_other_pids label primary_email_domain other_email_domains \\\n",
"47439 0 False NaN NaN \n",
"72557 3 False NaN NaN \n",
"94081 0 False NaN NaN \n",
"261673 4 False NaN NaN \n",
"326211 35 True NaN NaN \n",
"... ... ... ... ... \n",
"10579801 72 True NaN NaN \n",
"10590882 66 True yahoo.com [msu.edu, ksu.edu.sa] \n",
"10766062 33 True alzahra.ac.ir [gmail.com, gmail.com] \n",
"10807839 0 False NaN NaN \n",
"10911966 2 False NaN NaN \n",
"\n",
" url_domains n_emails n_urls \\\n",
"47439 [khntusg.com.ua, khntusg.com.ua, google.com.ua... <NA> 13 \n",
"72557 [google.com, ristekdikti.go.id, unsyiah.ac.id,... <NA> 16 \n",
"94081 [blogspot.mx, behance.net, authorstream.com, d... <NA> 24 \n",
"261673 [feriaempresamujer.com, escueladenegociosydire... <NA> 16 \n",
"326211 [google.com.my, researchgate.net, academia.edu... <NA> 16 \n",
"... ... ... ... \n",
"10579801 [rjohara.net, google.com, collegiateway.org, r... <NA> 12 \n",
"10590882 [shaguftaperveen.com, researchgate.net, ksu.ed... 2 11 \n",
"10766062 [scopus.com, google.com, publons.com, zenodo.o... 2 11 \n",
"10807839 [scopus.com, mendeley.com, publons.com, resear... <NA> 13 \n",
"10911966 [facebook.com, linkedin.com, instagram.com, re... <NA> 12 \n",
"\n",
" n_ids n_keywords n_education n_employment \n",
"47439 1 3 14 7 \n",
"72557 1 <NA> 2 1 \n",
"94081 1 8 <NA> <NA> \n",
"261673 <NA> 7 19 16 \n",
"326211 2 10 <NA> 4 \n",
"... ... ... ... ... \n",
"10579801 3 5 1 <NA> \n",
"10590882 <NA> 25 3 7 \n",
"10766062 1 4 3 4 \n",
"10807839 <NA> 4 3 6 \n",
"10911966 <NA> 1 <NA> 2 \n",
"\n",
"[140 rows x 30 columns]"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0000-0002-5967-2835</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>oleksiy</td>\n",
" <td>goryayinov</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[алексей николаевич горяинов, о.м.горяїнов, а....</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[diagnostics, transport, logistics]</td>\n",
" <td>[[researcherid, i-7977-2016]]</td>\n",
" <td>[[, дистанционный курс «ctl.sc2x: supply chain...</td>\n",
" <td>[[docent, kharkiv petro vasylenko national tec...</td>\n",
" <td>274</td>\n",
" <td>oleksiy goryayinov</td>\n",
" <td>2014-08-03 18:06:42.925000+00:00</td>\n",
" <td>2021-03-22 13:56:48.311000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[khntusg.com.ua, khntusg.com.ua, google.com.ua...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>13</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>14</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0000-0002-3505-2797</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>nurul</td>\n",
" <td>malahayati</td>\n",
" <td>google scholar</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>[[researcherid, q-3861-2017]]</td>\n",
" <td>[[civil and transportation engineering , maste...</td>\n",
" <td>[[senior lecturer, universitas syiah kuala, ba...</td>\n",
" <td>6</td>\n",
" <td>nurul malahayati</td>\n",
" <td>2017-10-01 00:46:31.324000+00:00</td>\n",
" <td>2019-08-19 15:52:47.253000+00:00</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com, ristekdikti.go.id, unsyiah.ac.id,...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>16</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0000-0003-3670-9620</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>carlos</td>\n",
" <td>barrera</td>\n",
" <td>im individual inventor, and this is my work; s...</td>\n",
" <td>[retrodynamic, novelinflow]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[engineering, gearturbine, technology, power, ...</td>\n",
" <td>[[loop profile, 394457]]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>carlos barrera</td>\n",
" <td>2016-08-29 20:32:10.362000+00:00</td>\n",
" <td>2021-02-09 04:56:35.554000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[blogspot.mx, behance.net, authorstream.com, d...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>24</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000-0002-5441-0465</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>nuria</td>\n",
" <td>hernández-león</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[nuria h. león, nuria hernández león, hernánde...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[icts, human resources, psychology of organiza...</td>\n",
" <td>NaN</td>\n",
" <td>[[, course: social skills, university of salam...</td>\n",
" <td>[[merchandise reception and expedition trainer...</td>\n",
" <td>11</td>\n",
" <td>nuria hernández-león</td>\n",
" <td>2015-11-28 07:18:58.442000+00:00</td>\n",
" <td>2021-03-05 16:37:47.403000+00:00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[feriaempresamujer.com, escueladenegociosydire...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>16</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>7</td>\n",
" <td>19</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0000-0002-7781-6767</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>mohd nazri</td>\n",
" <td>ismail</td>\n",
" <td>born in penang, malaysia in 1971, dr. mohd had...</td>\n",
" <td>[ndum (national defence university of malaysia)]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[wsn, design of network ip address, network ma...</td>\n",
" <td>[[scopus author id, 24372977800], [researcheri...</td>\n",
" <td>NaN</td>\n",
" <td>[[lecturer, universiti pertahanan nasional mal...</td>\n",
" <td>35</td>\n",
" <td>scopus - elsevier</td>\n",
" <td>2016-09-06 02:25:52.974000+00:00</td>\n",
" <td>2020-10-20 06:55:55.051000+00:00</td>\n",
" <td>24</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>35</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com.my, researchgate.net, academia.edu...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>16</td>\n",
" <td>2</td>\n",
" <td>10</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>0000-0001-5087-6965</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>robert</td>\n",
" <td>ohara</td>\n",
" <td>systematics, evolutionary biology, and the his...</td>\n",
" <td>[r. ohara, r.j. ohara, robert ohara, robert...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[evolutionary biology, education, new england ...</td>\n",
" <td>[[isni, 0000000138200102], [researcherid, b-47...</td>\n",
" <td>[[biology, ph.d., harvard university, cambridg...</td>\n",
" <td>NaN</td>\n",
" <td>45</td>\n",
" <td>robert j. ohara</td>\n",
" <td>2014-09-21 02:45:19.620000+00:00</td>\n",
" <td>2020-07-09 06:51:09.228000+00:00</td>\n",
" <td>23</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>72</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[rjohara.net, google.com, collegiateway.org, r...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>12</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>0000-0002-3318-9861</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>shagufta</td>\n",
" <td>perveen</td>\n",
" <td>prof. dr. shagufta perveen is a professor at k...</td>\n",
" <td>NaN</td>\n",
" <td>shagufta792000@yahoo.com</td>\n",
" <td>[shagufta perveen msu, shagufta perveen uk, sh...</td>\n",
" <td>NaN</td>\n",
" <td>[[hej research institute of chemistry, phd che...</td>\n",
" <td>[[professor, king saud university college of p...</td>\n",
" <td>66</td>\n",
" <td>scopus - elsevier</td>\n",
" <td>2015-12-21 10:34:06.771000+00:00</td>\n",
" <td>2021-02-22 14:58:30.893000+00:00</td>\n",
" <td>56</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>66</td>\n",
" <td>True</td>\n",
" <td>yahoo.com</td>\n",
" <td>[msu.edu, ksu.edu.sa]</td>\n",
" <td>[shaguftaperveen.com, researchgate.net, ksu.ed...</td>\n",
" <td>2</td>\n",
" <td>11</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>25</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>0000-0001-8960-9004</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>susan</td>\n",
" <td>bastani</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[s. bastani, سوسن باستانی]</td>\n",
" <td>sbastani@alzahra.ac.ir</td>\n",
" <td>[fuzzy logic, personal networks, social networ...</td>\n",
" <td>[[scopus author id, 16642098400]]</td>\n",
" <td>[[sociology, ph.d., university of toronto, tor...</td>\n",
" <td>[[professor, alzahra university, tehran, vanak...</td>\n",
" <td>20</td>\n",
" <td>scopus - elsevier</td>\n",
" <td>2019-07-10 06:50:46.255000+00:00</td>\n",
" <td>2020-10-07 04:08:01.961000+00:00</td>\n",
" <td>19</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>33</td>\n",
" <td>True</td>\n",
" <td>alzahra.ac.ir</td>\n",
" <td>[gmail.com, gmail.com]</td>\n",
" <td>[scopus.com, google.com, publons.com, zenodo.o...</td>\n",
" <td>2</td>\n",
" <td>11</td>\n",
" <td>1</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>0000-0002-4379-6454</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>caroline wanjiru</td>\n",
" <td>kariuki</td>\n",
" <td>caroline holds a phd in economics from curtin ...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[applied economics, financial economics, devel...</td>\n",
" <td>NaN</td>\n",
" <td>[[economics, doctor of philosophy , curtin uni...</td>\n",
" <td>[[director, educational development, strathmor...</td>\n",
" <td>4</td>\n",
" <td>caroline wanjiru kariuki</td>\n",
" <td>2020-03-18 10:18:04.007000+00:00</td>\n",
" <td>2021-02-11 14:40:38.515000+00:00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[scopus.com, mendeley.com, publons.com, resear...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>13</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>0000-0003-2311-0600</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>myo</td>\n",
" <td>kyaw hlaing</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[dr myo kyaw hlaing]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[economic geology]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[lecturer, union of myanmar ministry of educa...</td>\n",
" <td>2</td>\n",
" <td>myo kyaw hlaing</td>\n",
" <td>2018-12-26 12:51:57.801000+00:00</td>\n",
" <td>2021-01-26 14:36:47.421000+00:00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[facebook.com, linkedin.com, instagram.com, re...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>12</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>140 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"0 0000-0002-5967-2835 True True \n",
"1 0000-0002-3505-2797 True True \n",
"2 0000-0003-3670-9620 True True \n",
"3 0000-0002-5441-0465 True True \n",
"4 0000-0002-7781-6767 True True \n",
".. ... ... ... \n",
"135 0000-0001-5087-6965 True True \n",
"136 0000-0002-3318-9861 True True \n",
"137 0000-0001-8960-9004 True True \n",
"138 0000-0002-4379-6454 True True \n",
"139 0000-0003-2311-0600 True True \n",
"\n",
" given_names family_name \\\n",
"0 oleksiy goryayinov \n",
"1 nurul malahayati \n",
"2 carlos barrera \n",
"3 nuria hernández-león \n",
"4 mohd nazri ismail \n",
".. ... ... \n",
"135 robert ohara \n",
"136 shagufta perveen \n",
"137 susan bastani \n",
"138 caroline wanjiru kariuki \n",
"139 myo kyaw hlaing \n",
"\n",
" biography \\\n",
"0 <NA> \n",
"1 google scholar \n",
"2 im individual inventor, and this is my work; s... \n",
"3 <NA> \n",
"4 born in penang, malaysia in 1971, dr. mohd had... \n",
".. ... \n",
"135 systematics, evolutionary biology, and the his... \n",
"136 prof. dr. shagufta perveen is a professor at k... \n",
"137 <NA> \n",
"138 caroline holds a phd in economics from curtin ... \n",
"139 <NA> \n",
"\n",
" other_names \\\n",
"0 [алексей николаевич горяинов, о.м.горяїнов, а.... \n",
"1 NaN \n",
"2 [retrodynamic, novelinflow] \n",
"3 [nuria h. león, nuria hernández león, hernánde... \n",
"4 [ndum (national defence university of malaysia)] \n",
".. ... \n",
"135 [r. ohara, r.j. ohara, robert ohara, robert... \n",
"136 NaN \n",
"137 [s. bastani, سوسن باستانی] \n",
"138 NaN \n",
"139 [dr myo kyaw hlaing] \n",
"\n",
" primary_email \\\n",
"0 <NA> \n",
"1 <NA> \n",
"2 <NA> \n",
"3 <NA> \n",
"4 <NA> \n",
".. ... \n",
"135 <NA> \n",
"136 shagufta792000@yahoo.com \n",
"137 sbastani@alzahra.ac.ir \n",
"138 <NA> \n",
"139 <NA> \n",
"\n",
" keywords \\\n",
"0 [diagnostics, transport, logistics] \n",
"1 NaN \n",
"2 [engineering, gearturbine, technology, power, ... \n",
"3 [icts, human resources, psychology of organiza... \n",
"4 [wsn, design of network ip address, network ma... \n",
".. ... \n",
"135 [evolutionary biology, education, new england ... \n",
"136 [shagufta perveen msu, shagufta perveen uk, sh... \n",
"137 [fuzzy logic, personal networks, social networ... \n",
"138 [applied economics, financial economics, devel... \n",
"139 [economic geology] \n",
"\n",
" external_ids \\\n",
"0 [[researcherid, i-7977-2016]] \n",
"1 [[researcherid, q-3861-2017]] \n",
"2 [[loop profile, 394457]] \n",
"3 NaN \n",
"4 [[scopus author id, 24372977800], [researcheri... \n",
".. ... \n",
"135 [[isni, 0000000138200102], [researcherid, b-47... \n",
"136 NaN \n",
"137 [[scopus author id, 16642098400]] \n",
"138 NaN \n",
"139 NaN \n",
"\n",
" education \\\n",
"0 [[, дистанционный курс «ctl.sc2x: supply chain... \n",
"1 [[civil and transportation engineering , maste... \n",
"2 NaN \n",
"3 [[, course: social skills, university of salam... \n",
"4 NaN \n",
".. ... \n",
"135 [[biology, ph.d., harvard university, cambridg... \n",
"136 [[hej research institute of chemistry, phd che... \n",
"137 [[sociology, ph.d., university of toronto, tor... \n",
"138 [[economics, doctor of philosophy , curtin uni... \n",
"139 NaN \n",
"\n",
" employment n_works \\\n",
"0 [[docent, kharkiv petro vasylenko national tec... 274 \n",
"1 [[senior lecturer, universitas syiah kuala, ba... 6 \n",
"2 NaN 1 \n",
"3 [[merchandise reception and expedition trainer... 11 \n",
"4 [[lecturer, universiti pertahanan nasional mal... 35 \n",
".. ... ... \n",
"135 NaN 45 \n",
"136 [[professor, king saud university college of p... 66 \n",
"137 [[professor, alzahra university, tehran, vanak... 20 \n",
"138 [[director, educational development, strathmor... 4 \n",
"139 [[lecturer, union of myanmar ministry of educa... 2 \n",
"\n",
" works_source activation_date \\\n",
"0 oleksiy goryayinov 2014-08-03 18:06:42.925000+00:00 \n",
"1 nurul malahayati 2017-10-01 00:46:31.324000+00:00 \n",
"2 carlos barrera 2016-08-29 20:32:10.362000+00:00 \n",
"3 nuria hernández-león 2015-11-28 07:18:58.442000+00:00 \n",
"4 scopus - elsevier 2016-09-06 02:25:52.974000+00:00 \n",
".. ... ... \n",
"135 robert j. ohara 2014-09-21 02:45:19.620000+00:00 \n",
"136 scopus - elsevier 2015-12-21 10:34:06.771000+00:00 \n",
"137 scopus - elsevier 2019-07-10 06:50:46.255000+00:00 \n",
"138 caroline wanjiru kariuki 2020-03-18 10:18:04.007000+00:00 \n",
"139 myo kyaw hlaing 2018-12-26 12:51:57.801000+00:00 \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids \\\n",
"0 2021-03-22 13:56:48.311000+00:00 0 0 0 0 \n",
"1 2019-08-19 15:52:47.253000+00:00 3 0 0 3 \n",
"2 2021-02-09 04:56:35.554000+00:00 0 0 0 0 \n",
"3 2021-03-05 16:37:47.403000+00:00 1 0 0 4 \n",
"4 2020-10-20 06:55:55.051000+00:00 24 0 0 35 \n",
".. ... ... ... ... ... \n",
"135 2020-07-09 06:51:09.228000+00:00 23 0 0 72 \n",
"136 2021-02-22 14:58:30.893000+00:00 56 0 0 66 \n",
"137 2020-10-07 04:08:01.961000+00:00 19 0 0 33 \n",
"138 2021-02-11 14:40:38.515000+00:00 1 0 0 0 \n",
"139 2021-01-26 14:36:47.421000+00:00 1 0 0 2 \n",
"\n",
" label primary_email_domain other_email_domains \\\n",
"0 False NaN NaN \n",
"1 False NaN NaN \n",
"2 False NaN NaN \n",
"3 False NaN NaN \n",
"4 True NaN NaN \n",
".. ... ... ... \n",
"135 True NaN NaN \n",
"136 True yahoo.com [msu.edu, ksu.edu.sa] \n",
"137 True alzahra.ac.ir [gmail.com, gmail.com] \n",
"138 False NaN NaN \n",
"139 False NaN NaN \n",
"\n",
" url_domains n_emails n_urls \\\n",
"0 [khntusg.com.ua, khntusg.com.ua, google.com.ua... <NA> 13 \n",
"1 [google.com, ristekdikti.go.id, unsyiah.ac.id,... <NA> 16 \n",
"2 [blogspot.mx, behance.net, authorstream.com, d... <NA> 24 \n",
"3 [feriaempresamujer.com, escueladenegociosydire... <NA> 16 \n",
"4 [google.com.my, researchgate.net, academia.edu... <NA> 16 \n",
".. ... ... ... \n",
"135 [rjohara.net, google.com, collegiateway.org, r... <NA> 12 \n",
"136 [shaguftaperveen.com, researchgate.net, ksu.ed... 2 11 \n",
"137 [scopus.com, google.com, publons.com, zenodo.o... 2 11 \n",
"138 [scopus.com, mendeley.com, publons.com, resear... <NA> 13 \n",
"139 [facebook.com, linkedin.com, instagram.com, re... <NA> 12 \n",
"\n",
" n_ids n_keywords n_education n_employment \n",
"0 1 3 14 7 \n",
"1 1 <NA> 2 1 \n",
"2 1 8 <NA> <NA> \n",
"3 <NA> 7 19 16 \n",
"4 2 10 <NA> 4 \n",
".. ... ... ... ... \n",
"135 3 5 1 <NA> \n",
"136 <NA> 25 3 7 \n",
"137 1 4 3 4 \n",
"138 <NA> 4 3 6 \n",
"139 <NA> 1 <NA> 2 \n",
"\n",
"[140 rows x 30 columns]"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exploded_sources = df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)].explode('works_source').reset_index(drop=True)\n",
"exploded_sources"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0000-0002-5967-2835</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>oleksiy</td>\n",
" <td>goryayinov</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[алексей николаевич горяинов, о.м.горяїнов, а....</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[diagnostics, transport, logistics]</td>\n",
" <td>[[researcherid, i-7977-2016]]</td>\n",
" <td>[[, дистанционный курс «ctl.sc2x: supply chain...</td>\n",
" <td>[[docent, kharkiv petro vasylenko national tec...</td>\n",
" <td>274</td>\n",
" <td>oleksiy goryayinov</td>\n",
" <td>2014-08-03 18:06:42.925000+00:00</td>\n",
" <td>2021-03-22 13:56:48.311000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[khntusg.com.ua, khntusg.com.ua, google.com.ua...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>13</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>14</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0000-0002-3505-2797</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>nurul</td>\n",
" <td>malahayati</td>\n",
" <td>google scholar</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>[[researcherid, q-3861-2017]]</td>\n",
" <td>[[civil and transportation engineering , maste...</td>\n",
" <td>[[senior lecturer, universitas syiah kuala, ba...</td>\n",
" <td>6</td>\n",
" <td>nurul malahayati</td>\n",
" <td>2017-10-01 00:46:31.324000+00:00</td>\n",
" <td>2019-08-19 15:52:47.253000+00:00</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com, ristekdikti.go.id, unsyiah.ac.id,...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>16</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0000-0003-3670-9620</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>carlos</td>\n",
" <td>barrera</td>\n",
" <td>im individual inventor, and this is my work; s...</td>\n",
" <td>[retrodynamic, novelinflow]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[engineering, gearturbine, technology, power, ...</td>\n",
" <td>[[loop profile, 394457]]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>carlos barrera</td>\n",
" <td>2016-08-29 20:32:10.362000+00:00</td>\n",
" <td>2021-02-09 04:56:35.554000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[blogspot.mx, behance.net, authorstream.com, d...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>24</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000-0002-5441-0465</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>nuria</td>\n",
" <td>hernández-león</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[nuria h. león, nuria hernández león, hernánde...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[icts, human resources, psychology of organiza...</td>\n",
" <td>NaN</td>\n",
" <td>[[, course: social skills, university of salam...</td>\n",
" <td>[[merchandise reception and expedition trainer...</td>\n",
" <td>11</td>\n",
" <td>nuria hernández-león</td>\n",
" <td>2015-11-28 07:18:58.442000+00:00</td>\n",
" <td>2021-03-05 16:37:47.403000+00:00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[feriaempresamujer.com, escueladenegociosydire...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>16</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>7</td>\n",
" <td>19</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0000-0001-7010-2908</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>clara</td>\n",
" <td>sarmento</td>\n",
" <td>clara sarmento holds an aggregation in cultura...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[ethnography, portuguese culture and literatur...</td>\n",
" <td>[[ciência id, d418-d6f8-7d49]]</td>\n",
" <td>[[ao abrigo da bolsa santander ie best practic...</td>\n",
" <td>[[presidente da comissão de acreditação do nov...</td>\n",
" <td>275</td>\n",
" <td>clara sarmento</td>\n",
" <td>2013-12-12 00:33:58.190000+00:00</td>\n",
" <td>2020-10-12 14:43:00.749000+00:00</td>\n",
" <td>17</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>60</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[iscap.pt, google.pt, academia.edu, researchga...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>13</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>8</td>\n",
" <td>37</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>133</th>\n",
" <td>0000-0003-1020-1351</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>sheikh saifullah</td>\n",
" <td>ahmed</td>\n",
" <td>sheikh saifullah ahmed is a full-time lecturer...</td>\n",
" <td>NaN</td>\n",
" <td>saifullahahmedku@gmail.com</td>\n",
" <td>[post-truth, postmodern literature, critical t...</td>\n",
" <td>NaN</td>\n",
" <td>[[english discipline , ma &amp; ba in english , kh...</td>\n",
" <td>[[lecturer , international university of busin...</td>\n",
" <td>3</td>\n",
" <td>sheikh saifullah ahmed</td>\n",
" <td>2020-04-08 21:00:11.201000+00:00</td>\n",
" <td>2021-02-12 20:45:32.247000+00:00</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>False</td>\n",
" <td>gmail.com</td>\n",
" <td>NaN</td>\n",
" <td>[academia.edu, iubat.edu, google.com, research...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>12</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>0000-0001-7228-5680</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>text</td>\n",
" <td>protocol</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[engineer, textprotocol.org, palo alto, ca, u...</td>\n",
" <td>1</td>\n",
" <td>text protocol</td>\n",
" <td>2021-03-09 10:30:32.237000+00:00</td>\n",
" <td>2021-03-21 17:17:40.500000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[about.me, figma.com, github.com, gitlab.com, ...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>15</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>135</th>\n",
" <td>0000-0001-5087-6965</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>robert</td>\n",
" <td>ohara</td>\n",
" <td>systematics, evolutionary biology, and the his...</td>\n",
" <td>[r. ohara, r.j. ohara, robert ohara, robert...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[evolutionary biology, education, new england ...</td>\n",
" <td>[[isni, 0000000138200102], [researcherid, b-47...</td>\n",
" <td>[[biology, ph.d., harvard university, cambridg...</td>\n",
" <td>NaN</td>\n",
" <td>45</td>\n",
" <td>robert j. ohara</td>\n",
" <td>2014-09-21 02:45:19.620000+00:00</td>\n",
" <td>2020-07-09 06:51:09.228000+00:00</td>\n",
" <td>23</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>72</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[rjohara.net, google.com, collegiateway.org, r...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>12</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>0000-0002-4379-6454</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>caroline wanjiru</td>\n",
" <td>kariuki</td>\n",
" <td>caroline holds a phd in economics from curtin ...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[applied economics, financial economics, devel...</td>\n",
" <td>NaN</td>\n",
" <td>[[economics, doctor of philosophy , curtin uni...</td>\n",
" <td>[[director, educational development, strathmor...</td>\n",
" <td>4</td>\n",
" <td>caroline wanjiru kariuki</td>\n",
" <td>2020-03-18 10:18:04.007000+00:00</td>\n",
" <td>2021-02-11 14:40:38.515000+00:00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[scopus.com, mendeley.com, publons.com, resear...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>13</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>0000-0003-2311-0600</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>myo</td>\n",
" <td>kyaw hlaing</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[dr myo kyaw hlaing]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[economic geology]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[lecturer, union of myanmar ministry of educa...</td>\n",
" <td>2</td>\n",
" <td>myo kyaw hlaing</td>\n",
" <td>2018-12-26 12:51:57.801000+00:00</td>\n",
" <td>2021-01-26 14:36:47.421000+00:00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[facebook.com, linkedin.com, instagram.com, re...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>12</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>113 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"0 0000-0002-5967-2835 True True \n",
"1 0000-0002-3505-2797 True True \n",
"2 0000-0003-3670-9620 True True \n",
"3 0000-0002-5441-0465 True True \n",
"5 0000-0001-7010-2908 True True \n",
".. ... ... ... \n",
"133 0000-0003-1020-1351 True True \n",
"134 0000-0001-7228-5680 True True \n",
"135 0000-0001-5087-6965 True True \n",
"138 0000-0002-4379-6454 True True \n",
"139 0000-0003-2311-0600 True True \n",
"\n",
" given_names family_name \\\n",
"0 oleksiy goryayinov \n",
"1 nurul malahayati \n",
"2 carlos barrera \n",
"3 nuria hernández-león \n",
"5 clara sarmento \n",
".. ... ... \n",
"133 sheikh saifullah ahmed \n",
"134 text protocol \n",
"135 robert ohara \n",
"138 caroline wanjiru kariuki \n",
"139 myo kyaw hlaing \n",
"\n",
" biography \\\n",
"0 <NA> \n",
"1 google scholar \n",
"2 im individual inventor, and this is my work; s... \n",
"3 <NA> \n",
"5 clara sarmento holds an aggregation in cultura... \n",
".. ... \n",
"133 sheikh saifullah ahmed is a full-time lecturer... \n",
"134 <NA> \n",
"135 systematics, evolutionary biology, and the his... \n",
"138 caroline holds a phd in economics from curtin ... \n",
"139 <NA> \n",
"\n",
" other_names \\\n",
"0 [алексей николаевич горяинов, о.м.горяїнов, а.... \n",
"1 NaN \n",
"2 [retrodynamic, novelinflow] \n",
"3 [nuria h. león, nuria hernández león, hernánde... \n",
"5 NaN \n",
".. ... \n",
"133 NaN \n",
"134 NaN \n",
"135 [r. ohara, r.j. ohara, robert ohara, robert... \n",
"138 NaN \n",
"139 [dr myo kyaw hlaing] \n",
"\n",
" primary_email \\\n",
"0 <NA> \n",
"1 <NA> \n",
"2 <NA> \n",
"3 <NA> \n",
"5 <NA> \n",
".. ... \n",
"133 saifullahahmedku@gmail.com \n",
"134 <NA> \n",
"135 <NA> \n",
"138 <NA> \n",
"139 <NA> \n",
"\n",
" keywords \\\n",
"0 [diagnostics, transport, logistics] \n",
"1 NaN \n",
"2 [engineering, gearturbine, technology, power, ... \n",
"3 [icts, human resources, psychology of organiza... \n",
"5 [ethnography, portuguese culture and literatur... \n",
".. ... \n",
"133 [post-truth, postmodern literature, critical t... \n",
"134 NaN \n",
"135 [evolutionary biology, education, new england ... \n",
"138 [applied economics, financial economics, devel... \n",
"139 [economic geology] \n",
"\n",
" external_ids \\\n",
"0 [[researcherid, i-7977-2016]] \n",
"1 [[researcherid, q-3861-2017]] \n",
"2 [[loop profile, 394457]] \n",
"3 NaN \n",
"5 [[ciência id, d418-d6f8-7d49]] \n",
".. ... \n",
"133 NaN \n",
"134 NaN \n",
"135 [[isni, 0000000138200102], [researcherid, b-47... \n",
"138 NaN \n",
"139 NaN \n",
"\n",
" education \\\n",
"0 [[, дистанционный курс «ctl.sc2x: supply chain... \n",
"1 [[civil and transportation engineering , maste... \n",
"2 NaN \n",
"3 [[, course: social skills, university of salam... \n",
"5 [[ao abrigo da bolsa santander ie best practic... \n",
".. ... \n",
"133 [[english discipline , ma & ba in english , kh... \n",
"134 NaN \n",
"135 [[biology, ph.d., harvard university, cambridg... \n",
"138 [[economics, doctor of philosophy , curtin uni... \n",
"139 NaN \n",
"\n",
" employment n_works \\\n",
"0 [[docent, kharkiv petro vasylenko national tec... 274 \n",
"1 [[senior lecturer, universitas syiah kuala, ba... 6 \n",
"2 NaN 1 \n",
"3 [[merchandise reception and expedition trainer... 11 \n",
"5 [[presidente da comissão de acreditação do nov... 275 \n",
".. ... ... \n",
"133 [[lecturer , international university of busin... 3 \n",
"134 [[engineer, textprotocol.org, palo alto, ca, u... 1 \n",
"135 NaN 45 \n",
"138 [[director, educational development, strathmor... 4 \n",
"139 [[lecturer, union of myanmar ministry of educa... 2 \n",
"\n",
" works_source activation_date \\\n",
"0 oleksiy goryayinov 2014-08-03 18:06:42.925000+00:00 \n",
"1 nurul malahayati 2017-10-01 00:46:31.324000+00:00 \n",
"2 carlos barrera 2016-08-29 20:32:10.362000+00:00 \n",
"3 nuria hernández-león 2015-11-28 07:18:58.442000+00:00 \n",
"5 clara sarmento 2013-12-12 00:33:58.190000+00:00 \n",
".. ... ... \n",
"133 sheikh saifullah ahmed 2020-04-08 21:00:11.201000+00:00 \n",
"134 text protocol 2021-03-09 10:30:32.237000+00:00 \n",
"135 robert j. ohara 2014-09-21 02:45:19.620000+00:00 \n",
"138 caroline wanjiru kariuki 2020-03-18 10:18:04.007000+00:00 \n",
"139 myo kyaw hlaing 2018-12-26 12:51:57.801000+00:00 \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids \\\n",
"0 2021-03-22 13:56:48.311000+00:00 0 0 0 0 \n",
"1 2019-08-19 15:52:47.253000+00:00 3 0 0 3 \n",
"2 2021-02-09 04:56:35.554000+00:00 0 0 0 0 \n",
"3 2021-03-05 16:37:47.403000+00:00 1 0 0 4 \n",
"5 2020-10-12 14:43:00.749000+00:00 17 0 0 60 \n",
".. ... ... ... ... ... \n",
"133 2021-02-12 20:45:32.247000+00:00 2 0 0 3 \n",
"134 2021-03-21 17:17:40.500000+00:00 0 0 0 0 \n",
"135 2020-07-09 06:51:09.228000+00:00 23 0 0 72 \n",
"138 2021-02-11 14:40:38.515000+00:00 1 0 0 0 \n",
"139 2021-01-26 14:36:47.421000+00:00 1 0 0 2 \n",
"\n",
" label primary_email_domain other_email_domains \\\n",
"0 False NaN NaN \n",
"1 False NaN NaN \n",
"2 False NaN NaN \n",
"3 False NaN NaN \n",
"5 True NaN NaN \n",
".. ... ... ... \n",
"133 False gmail.com NaN \n",
"134 False NaN NaN \n",
"135 True NaN NaN \n",
"138 False NaN NaN \n",
"139 False NaN NaN \n",
"\n",
" url_domains n_emails n_urls \\\n",
"0 [khntusg.com.ua, khntusg.com.ua, google.com.ua... <NA> 13 \n",
"1 [google.com, ristekdikti.go.id, unsyiah.ac.id,... <NA> 16 \n",
"2 [blogspot.mx, behance.net, authorstream.com, d... <NA> 24 \n",
"3 [feriaempresamujer.com, escueladenegociosydire... <NA> 16 \n",
"5 [iscap.pt, google.pt, academia.edu, researchga... <NA> 13 \n",
".. ... ... ... \n",
"133 [academia.edu, iubat.edu, google.com, research... <NA> 12 \n",
"134 [about.me, figma.com, github.com, gitlab.com, ... <NA> 15 \n",
"135 [rjohara.net, google.com, collegiateway.org, r... <NA> 12 \n",
"138 [scopus.com, mendeley.com, publons.com, resear... <NA> 13 \n",
"139 [facebook.com, linkedin.com, instagram.com, re... <NA> 12 \n",
"\n",
" n_ids n_keywords n_education n_employment \n",
"0 1 3 14 7 \n",
"1 1 <NA> 2 1 \n",
"2 1 8 <NA> <NA> \n",
"3 <NA> 7 19 16 \n",
"5 1 6 8 37 \n",
".. ... ... ... ... \n",
"133 <NA> 5 1 1 \n",
"134 <NA> <NA> <NA> 1 \n",
"135 3 5 1 <NA> \n",
"138 <NA> 4 3 6 \n",
"139 <NA> 1 <NA> 2 \n",
"\n",
"[113 rows x 30 columns]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exploded_sources[exploded_sources.apply(lambda x: x['works_source'].find(x['given_names']) >= 0, axis=1)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Works source"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"def remove_self_source(lst, given, family):\n",
" res = []\n",
" for ws in lst:\n",
" if ws.lower().find(given.lower()) == -1:\n",
" if pd.notna(family):\n",
" if ws.lower().find(family.lower()) == -1:\n",
" res.append(ws)\n",
" else:\n",
" res.append(ws)\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"df['ext_works_source'] = df[(df.works_source.notna()) & (df.given_names.notna())]\\\n",
" .apply(lambda x: remove_self_source(x['works_source'], x['given_names'], x['family_name']), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"df['n_ext_work_source'] = pd.Series(df.ext_works_source.str.len(), dtype=pd.Int16Dtype())"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"exploded_external_sources = df[df['ext_works_source'].str.len() > 0][['orcid','ext_works_source']]\\\n",
" .explode('ext_works_source').reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"grouped_ext_sources = exploded_external_sources.groupby('ext_works_source')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)\\\n",
" .reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"crossref",
"scopus - elsevier",
"crossref metadata search",
"multidisciplinary digital publishing institute",
"europe pubmed central",
"researcherid",
"publons",
"ciênciavitae",
"base - bielefeld academic search engine",
"datacite",
"redalyc",
"mla international bibliography",
"deutsche nationalbibliothek (dnb)",
"nasa astrophysics data system",
"national information processing institute ",
"f1000",
"inspire-hep",
"university of helsinki",
"hal",
"igi global",
"airiti",
"university of copenhagen",
"universidade federal de uberlândia",
"aarhus university",
"universidad del país vasco",
"university of manchester - pure",
"kings college london",
"university of southern denmark",
"wellcome open research",
"macquarie university"
],
"y": [
1460841,
902231,
297684,
281664,
181605,
158148,
39786,
32315,
20699,
16107,
9640,
8059,
7855,
7403,
6509,
5221,
4872,
4152,
4136,
3833,
3725,
3127,
2718,
2311,
2271,
2227,
2199,
2185,
2113,
2053
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 30 works_source"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"7aa828bc-7abe-40e2-825e-aefeec778204\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"7aa828bc-7abe-40e2-825e-aefeec778204\")) { Plotly.newPlot( \"7aa828bc-7abe-40e2-825e-aefeec778204\", [{\"type\": \"bar\", \"x\": [\"crossref\", \"scopus - elsevier\", \"crossref metadata search\", \"multidisciplinary digital publishing institute\", \"europe pubmed central\", \"researcherid\", \"publons\", \"ci\\u00eanciavitae\", \"base - bielefeld academic search engine\", \"datacite\", \"redalyc\", \"mla international bibliography\", \"deutsche nationalbibliothek (dnb)\", \"nasa astrophysics data system\", \"national information processing institute \", \"f1000\", \"inspire-hep\", \"university of helsinki\", \"hal\", \"igi global\", \"airiti\", \"university of copenhagen\", \"universidade federal de uberl\\u00e2ndia\", \"aarhus university\", \"universidad del pa\\u00eds vasco\", \"university of manchester - pure\", \"kings college london\", \"university of southern denmark\", \"wellcome open research\", \"macquarie university\"], \"y\": [1460841, 902231, 297684, 281664, 181605, 158148, 39786, 32315, 20699, 16107, 9640, 8059, 7855, 7403, 6509, 5221, 4872, 4152, 4136, 3833, 3725, 3127, 2718, 2311, 2271, 2227, 2199, 2185, 2113, 2053]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top 30 works_source\"}, \"xaxis\": {\"range\": [-0.5, 29.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('7aa828bc-7abe-40e2-825e-aefeec778204');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(30)\n",
"data = [\n",
" go.Bar(\n",
" x=grouped_ext_sources[:TOP_N].ext_works_source,\n",
" y=grouped_ext_sources[:TOP_N].orcid\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top %s works_source' % TOP_N,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ext_works_source</th>\n",
" <th>orcid</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>crossref</td>\n",
" <td>1460841</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>scopus - elsevier</td>\n",
" <td>902231</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>crossref metadata search</td>\n",
" <td>297684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>multidisciplinary digital publishing institute</td>\n",
" <td>281664</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>europe pubmed central</td>\n",
" <td>181605</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>337</th>\n",
" <td>uta - oa journal global insight</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>338</th>\n",
" <td>francis crick institute</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>339</th>\n",
" <td>anna</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>340</th>\n",
" <td>santos</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>341</th>\n",
" <td>universitäts- und stadtbibliothek köln</td>\n",
" <td>3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>342 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" ext_works_source orcid\n",
"0 crossref 1460841\n",
"1 scopus - elsevier 902231\n",
"2 crossref metadata search 297684\n",
"3 multidisciplinary digital publishing institute 281664\n",
"4 europe pubmed central 181605\n",
".. ... ...\n",
"337 uta - oa journal global insight 3\n",
"338 francis crick institute 3\n",
"339 anna 3\n",
"340 santos 3\n",
"341 universitäts- und stadtbibliothek köln 3\n",
"\n",
"[342 rows x 2 columns]"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"authoritative_sources = grouped_ext_sources[grouped_ext_sources['orcid'] > 2]\n",
"authoritative_sources"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"exploded_external_sources['authoritative'] = exploded_external_sources.ext_works_source\\\n",
" .isin(authoritative_sources['ext_works_source'])"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"orcid_authoritative_source = exploded_external_sources\\\n",
" .groupby('orcid')['authoritative']\\\n",
" .any()\\\n",
" .reset_index()[['orcid', 'authoritative']]"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"df = df.merge(orcid_authoritative_source, on='orcid', how='left')"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"df.loc[df.authoritative.isna(), 'authoritative'] = False"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" <th>ext_works_source</th>\n",
" <th>n_ext_work_source</th>\n",
" <th>authoritative</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0000-0001-6097-3953</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-03-02 09:29:16.528000+00:00</td>\n",
" <td>2018-03-02 09:43:07.551000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0000-0001-6112-5550</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[v.i. yurtaev; v. yurtaev]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[professor, peoples friendship university of ...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-04-03 07:50:23.358000+00:00</td>\n",
" <td>2020-03-18 09:42:44.753000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0000-0001-6152-2695</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2019-12-11 15:31:56.388000+00:00</td>\n",
" <td>2020-01-28 15:34:17.309000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000-0001-6220-5683</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[research scientist, new york university abu ...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-08-18 12:36:45.307000+00:00</td>\n",
" <td>2020-09-23 13:37:54.180000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0000-0001-7071-8294</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[researcher (academic), universidad de zarago...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2014-03-10 13:22:01.966000+00:00</td>\n",
" <td>2016-06-14 22:17:54.470000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email given_names \\\n",
"0 0000-0001-6097-3953 False False <NA> \n",
"1 0000-0001-6112-5550 True True <NA> \n",
"2 0000-0001-6152-2695 True True <NA> \n",
"3 0000-0001-6220-5683 True True <NA> \n",
"4 0000-0001-7071-8294 True True <NA> \n",
"\n",
" family_name biography other_names primary_email keywords \\\n",
"0 <NA> <NA> NaN <NA> NaN \n",
"1 <NA> <NA> [v.i. yurtaev; v. yurtaev] <NA> NaN \n",
"2 <NA> <NA> NaN <NA> NaN \n",
"3 <NA> <NA> NaN <NA> NaN \n",
"4 <NA> <NA> NaN <NA> NaN \n",
"\n",
" external_ids education employment \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN [[professor, peoples friendship university of ... \n",
"2 NaN NaN NaN \n",
"3 NaN NaN [[research scientist, new york university abu ... \n",
"4 NaN NaN [[researcher (academic), universidad de zarago... \n",
"\n",
" n_works works_source activation_date \\\n",
"0 0 NaN 2018-03-02 09:29:16.528000+00:00 \n",
"1 0 NaN 2018-04-03 07:50:23.358000+00:00 \n",
"2 0 NaN 2019-12-11 15:31:56.388000+00:00 \n",
"3 0 NaN 2015-08-18 12:36:45.307000+00:00 \n",
"4 0 NaN 2014-03-10 13:22:01.966000+00:00 \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids \\\n",
"0 2018-03-02 09:43:07.551000+00:00 0 0 0 0 \n",
"1 2020-03-18 09:42:44.753000+00:00 0 0 0 0 \n",
"2 2020-01-28 15:34:17.309000+00:00 0 0 0 0 \n",
"3 2020-09-23 13:37:54.180000+00:00 0 0 0 0 \n",
"4 2016-06-14 22:17:54.470000+00:00 0 0 0 0 \n",
"\n",
" label primary_email_domain other_email_domains url_domains n_emails \\\n",
"0 False NaN NaN NaN <NA> \n",
"1 False NaN NaN NaN <NA> \n",
"2 False NaN NaN NaN <NA> \n",
"3 False NaN NaN NaN <NA> \n",
"4 False NaN NaN NaN <NA> \n",
"\n",
" n_urls n_ids n_keywords n_education n_employment ext_works_source \\\n",
"0 <NA> <NA> <NA> <NA> <NA> NaN \n",
"1 <NA> <NA> <NA> <NA> 1 NaN \n",
"2 <NA> <NA> <NA> <NA> <NA> NaN \n",
"3 <NA> <NA> <NA> <NA> 1 NaN \n",
"4 <NA> <NA> <NA> <NA> 2 NaN \n",
"\n",
" n_ext_work_source authoritative \n",
"0 <NA> False \n",
"1 <NA> False \n",
"2 <NA> False \n",
"3 <NA> False \n",
"4 <NA> False "
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## External IDs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"External IDs should come from reliable sources. ORCiD registrants cannot add them freely."
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 1.308598e+06\n",
"mean 1.359082e+00\n",
"std 6.643235e-01\n",
"min 1.000000e+00\n",
"25% 1.000000e+00\n",
"50% 1.000000e+00\n",
"75% 2.000000e+00\n",
"max 8.000000e+01\n",
"Name: n_ids, dtype: float64"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.n_ids.describe()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" <th>ext_works_source</th>\n",
" <th>n_ext_work_source</th>\n",
" <th>authoritative</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3896226</th>\n",
" <td>0000-0002-9554-6633</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>john a</td>\n",
" <td>williams</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>[[scopus author id,  55553733518], [scopus aut...</td>\n",
" <td>NaN</td>\n",
" <td>[[, aston university, birmingham, , gb, 1722, ...</td>\n",
" <td>92</td>\n",
" <td>[aston research explorer]</td>\n",
" <td>2014-11-20 09:42:10.690000+00:00</td>\n",
" <td>2021-03-17 01:00:51.203000+00:00</td>\n",
" <td>80</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>208</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[aston.ac.uk]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>80</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>[aston research explorer]</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"3896226 0000-0002-9554-6633 True True \n",
"\n",
" given_names family_name biography other_names primary_email keywords \\\n",
"3896226 john a williams <NA> NaN <NA> NaN \n",
"\n",
" external_ids education \\\n",
"3896226 [[scopus author id,  55553733518], [scopus aut... NaN \n",
"\n",
" employment n_works \\\n",
"3896226 [[, aston university, birmingham, , gb, 1722, ... 92 \n",
"\n",
" works_source activation_date \\\n",
"3896226 [aston research explorer] 2014-11-20 09:42:10.690000+00:00 \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids \\\n",
"3896226 2021-03-17 01:00:51.203000+00:00 80 0 0 208 \n",
"\n",
" label primary_email_domain other_email_domains url_domains \\\n",
"3896226 True NaN NaN [aston.ac.uk] \n",
"\n",
" n_emails n_urls n_ids n_keywords n_education n_employment \\\n",
"3896226 <NA> 1 80 <NA> <NA> 1 \n",
"\n",
" ext_works_source n_ext_work_source authoritative \n",
"3896226 [aston research explorer] 1 True "
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.n_ids == df.n_ids.max()]"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"ids = df[['orcid', 'external_ids']].explode('external_ids').reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"ids['provider'] = ids[ids.external_ids.notna()]['external_ids'].apply(lambda x: x[0])"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>external_ids</th>\n",
" <th>provider</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0000-0001-8315-2066</td>\n",
" <td>[researcherid, k-4630-2014]</td>\n",
" <td>researcherid</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>0000-0002-2638-4108</td>\n",
" <td>[scopus author id, 54394231000]</td>\n",
" <td>scopus author id</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>0000-0003-1435-6545</td>\n",
" <td>[researcherid, p-2223-2018]</td>\n",
" <td>researcherid</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>0000-0003-2259-7023</td>\n",
" <td>[scopus author id, 57189297461]</td>\n",
" <td>scopus author id</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>0000-0002-7397-5824</td>\n",
" <td>[scopus author id, 8399842800]</td>\n",
" <td>scopus author id</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid external_ids provider\n",
"9 0000-0001-8315-2066 [researcherid, k-4630-2014] researcherid\n",
"29 0000-0002-2638-4108 [scopus author id, 54394231000] scopus author id\n",
"46 0000-0003-1435-6545 [researcherid, p-2223-2018] researcherid\n",
"50 0000-0003-2259-7023 [scopus author id, 57189297461] scopus author id\n",
"64 0000-0002-7397-5824 [scopus author id, 8399842800] scopus author id"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ids[ids.provider.notna()].head()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"top_ids_providers = ids.groupby('provider').count().sort_values('orcid', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"scopus author id",
"researcherid",
"loop profile",
"ciência id",
"researcher name resolver id",
"sciprofile",
"中国科学家在线",
"isni",
"gnd",
"pitt id",
"technical university of denmark cwis",
"researcher id",
"id dialnet",
"digital author id",
"scopus author id: ",
"authenticusid",
"hku researcherpage",
"uow scholars",
"cti vitae",
"scopus author id:",
"hkust profile",
"chalmers id",
"scopus id",
"iauthor",
"google scholar",
"digital author id (dai)",
"authid",
"dai",
"us epa vivo",
"scopus id",
"authenticus",
"smithsonian profiles",
"github",
"escientist",
"vivo cornell",
"researcherid:",
"id dialnet:",
"dialnet id",
"sciprofiles",
"kaken",
"une researcher id",
"researcherid: ",
"orcid",
"scienceopen",
"profile system identifier",
"orcid id",
"custom"
],
"y": [
1037239,
545399,
118645,
37042,
7954,
5164,
4811,
3089,
2999,
2679,
2483,
1452,
1169,
1126,
1077,
878,
741,
646,
582,
547,
523,
430,
256,
212,
201,
180,
175,
155,
146,
127,
83,
61,
51,
49,
46,
39,
7,
6,
5,
5,
4,
3,
2,
1,
1,
1,
1
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "IDs provided by providers"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"12961d20-9716-4052-b8e3-3f537e67d613\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"12961d20-9716-4052-b8e3-3f537e67d613\")) { Plotly.newPlot( \"12961d20-9716-4052-b8e3-3f537e67d613\", [{\"type\": \"bar\", \"x\": [\"scopus author id\", \"researcherid\", \"loop profile\", \"ci\\u00eancia id\", \"researcher name resolver id\", \"sciprofile\", \"\\u4e2d\\u56fd\\u79d1\\u5b66\\u5bb6\\u5728\\u7ebf\", \"isni\", \"gnd\", \"pitt id\", \"technical university of denmark cwis\", \"researcher id\", \"id dialnet\", \"digital author id\", \"scopus author id: \", \"authenticusid\", \"hku researcherpage\", \"uow scholars\", \"cti vitae\", \"scopus author id:\", \"hkust profile\", \"chalmers id\", \"scopus id\", \"iauthor\", \"google scholar\", \"digital author id (dai)\", \"authid\", \"dai\", \"us epa vivo\", \"scopus id\", \"authenticus\", \"smithsonian profiles\", \"github\", \"escientist\", \"vivo cornell\", \"researcherid:\", \"id dialnet:\", \"dialnet id\", \"sciprofiles\", \"kaken\", \"une researcher id\", \"researcherid: \", \"orcid\", \"scienceopen\", \"profile system identifier\", \"orcid id\", \"custom\"], \"y\": [1037239, 545399, 118645, 37042, 7954, 5164, 4811, 3089, 2999, 2679, 2483, 1452, 1169, 1126, 1077, 878, 741, 646, 582, 547, 523, 430, 256, 212, 201, 180, 175, 155, 146, 127, 83, 61, 51, 49, 46, 39, 7, 6, 5, 5, 4, 3, 2, 1, 1, 1, 1]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"IDs provided by providers\"}, \"xaxis\": {\"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('12961d20-9716-4052-b8e3-3f537e67d613');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = [\n",
" go.Bar(\n",
" x=top_ids_providers.index,\n",
" y=top_ids_providers['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='IDs provided by providers',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([nan, 'researcherid', 'scopus author id', 'loop profile', 'gnd',\n",
" 'ciência id', 'researcher name resolver id', 'pitt id',\n",
" 'id dialnet', 'isni', 'technical university of denmark cwis',\n",
" 'chalmers id', 'scopus author id: ', 'scopus author id:',\n",
" 'hkust profile', 'hku researcherpage', '中国科学家在线', 'uow scholars',\n",
" 'sciprofile', 'cti vitae', 'digital author id', 'researcher id',\n",
" 'authenticusid', 'authid', 'authenticus', 'scopus id',\n",
" 'digital author id (dai)', 'researcherid:', 'vivo cornell',\n",
" 'us epa vivo', 'escientist', 'github', 'iauthor', 'orcid id',\n",
" 'dai', 'scopus id', 'smithsonian profiles', 'google scholar',\n",
" 'kaken', 'dialnet id', 'researcherid: ', 'une researcher id',\n",
" 'sciprofiles', 'id dialnet:', 'scienceopen', 'orcid',\n",
" 'profile system identifier', 'custom'], dtype=object)"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.unique(ids['provider'])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Keywords"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This field is problematic as users can be nasty and put multiple keywords in one as opposed of having different keywords. Look this"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>n_keywords</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3751714</th>\n",
" <td>0000-0002-0673-0341</td>\n",
" <td>154</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8697926</th>\n",
" <td>0000-0003-3343-5660</td>\n",
" <td>148</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1154523</th>\n",
" <td>0000-0002-6075-3501</td>\n",
" <td>140</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6512971</th>\n",
" <td>0000-0002-7060-4112</td>\n",
" <td>140</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1515197</th>\n",
" <td>0000-0001-5287-1949</td>\n",
" <td>132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989644</th>\n",
" <td>0000-0002-1686-1935</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989645</th>\n",
" <td>0000-0002-3800-6331</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989646</th>\n",
" <td>0000-0002-8783-5814</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989647</th>\n",
" <td>0000-0002-7584-2283</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989648</th>\n",
" <td>0000-0003-0529-3538</td>\n",
" <td>&lt;NA&gt;</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10989649 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid n_keywords\n",
"3751714 0000-0002-0673-0341 154\n",
"8697926 0000-0003-3343-5660 148\n",
"1154523 0000-0002-6075-3501 140\n",
"6512971 0000-0002-7060-4112 140\n",
"1515197 0000-0001-5287-1949 132\n",
"... ... ...\n",
"10989644 0000-0002-1686-1935 <NA>\n",
"10989645 0000-0002-3800-6331 <NA>\n",
"10989646 0000-0002-8783-5814 <NA>\n",
"10989647 0000-0002-7584-2283 <NA>\n",
"10989648 0000-0003-0529-3538 <NA>\n",
"\n",
"[10989649 rows x 2 columns]"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"keywords_by_orcid = df[['orcid', 'n_keywords']].sort_values('n_keywords', ascending=False)\n",
"keywords_by_orcid"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"0000-0002-0673-0341",
"0000-0003-3343-5660",
"0000-0002-6075-3501",
"0000-0002-7060-4112",
"0000-0001-5287-1949",
"0000-0002-9638-8091",
"0000-0002-4071-0301",
"0000-0001-9462-5666",
"0000-0002-0929-2412",
"0000-0002-0115-7195",
"0000-0002-4235-4259",
"0000-0003-0076-6287",
"0000-0001-9715-9357",
"0000-0002-1878-9762",
"0000-0001-6307-6027",
"0000-0003-2273-9888",
"0000-0002-0937-7061",
"0000-0002-1770-9660",
"0000-0001-5696-1052",
"0000-0003-2998-5520",
"0000-0003-1799-0971",
"0000-0002-0156-3580",
"0000-0002-9625-6742",
"0000-0003-1399-7156",
"0000-0001-9985-1697",
"0000-0001-6537-7683",
"0000-0002-8401-8018",
"0000-0003-4246-8579",
"0000-0001-7857-4133",
"0000-0002-7710-0355",
"0000-0001-5869-2204",
"0000-0002-8083-7382",
"0000-0001-8670-4372",
"0000-0001-7654-5013",
"0000-0002-4488-2880",
"0000-0003-4374-6374",
"0000-0001-6939-3859",
"0000-0003-2509-2549",
"0000-0002-3186-8860",
"0000-0002-0441-1507",
"0000-0001-5230-715X",
"0000-0003-0209-180X",
"0000-0001-9336-6850",
"0000-0002-0463-0048",
"0000-0001-5458-7167",
"0000-0002-9381-2264",
"0000-0002-8227-5387",
"0000-0002-3061-3364",
"0000-0002-9293-0189",
"0000-0002-3123-3021",
"0000-0003-1071-4296",
"0000-0003-3340-6413",
"0000-0003-3584-6834",
"0000-0002-8644-8396",
"0000-0002-2935-1934",
"0000-0002-1718-1632",
"0000-0002-8659-6321",
"0000-0002-8449-2211",
"0000-0003-1693-3190",
"0000-0001-5637-1124",
"0000-0001-5167-7466",
"0000-0002-3532-043X",
"0000-0001-6861-9561",
"0000-0003-4608-3844",
"0000-0003-4505-3678",
"0000-0003-4673-1063",
"0000-0001-8174-8835",
"0000-0002-6347-9464",
"0000-0002-8918-2781",
"0000-0003-4511-7942",
"0000-0003-2532-2906",
"0000-0001-9280-6017",
"0000-0002-5274-7742",
"0000-0001-9586-0780",
"0000-0003-3720-1183",
"0000-0001-5819-4555",
"0000-0002-1103-9651",
"0000-0001-8135-2304",
"0000-0002-8499-1045",
"0000-0003-2550-1859",
"0000-0002-8665-9281",
"0000-0001-7818-3212",
"0000-0003-1863-0265",
"0000-0001-8733-5230",
"0000-0003-2218-1343",
"0000-0002-5306-7781",
"0000-0001-7728-4046",
"0000-0003-4486-2684",
"0000-0002-4982-5236",
"0000-0001-5300-3932",
"0000-0003-3342-6123",
"0000-0002-8072-1152",
"0000-0002-3494-2624",
"0000-0002-0715-0461",
"0000-0002-3907-3552",
"0000-0001-5556-8275",
"0000-0002-3597-3350",
"0000-0002-2252-672X",
"0000-0001-7392-9361",
"0000-0001-8689-185X"
],
"y": [
154,
148,
140,
140,
132,
124,
115,
106,
105,
102,
100,
94,
92,
92,
88,
86,
78,
77,
75,
75,
72,
71,
70,
68,
68,
68,
67,
66,
64,
64,
63,
62,
61,
61,
61,
60,
60,
56,
55,
54,
54,
53,
53,
53,
53,
53,
52,
52,
52,
51,
51,
51,
50,
50,
50,
50,
50,
49,
49,
49,
49,
48,
48,
48,
48,
48,
47,
47,
47,
47,
46,
46,
46,
45,
45,
45,
45,
44,
44,
44,
44,
44,
44,
44,
44,
44,
43,
43,
43,
43,
43,
43,
43,
43,
42,
42,
42,
42,
42,
42
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Keywords provided by ORCiD"
},
"xaxis": {
"range": [
-0.5,
99.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"45557cd6-bf47-4a92-93fb-f079792fe91c\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"45557cd6-bf47-4a92-93fb-f079792fe91c\")) { Plotly.newPlot( \"45557cd6-bf47-4a92-93fb-f079792fe91c\", [{\"type\": \"bar\", \"x\": [\"0000-0002-0673-0341\", \"0000-0003-3343-5660\", \"0000-0002-6075-3501\", \"0000-0002-7060-4112\", \"0000-0001-5287-1949\", \"0000-0002-9638-8091\", \"0000-0002-4071-0301\", \"0000-0001-9462-5666\", \"0000-0002-0929-2412\", \"0000-0002-0115-7195\", \"0000-0002-4235-4259\", \"0000-0003-0076-6287\", \"0000-0001-9715-9357\", \"0000-0002-1878-9762\", \"0000-0001-6307-6027\", \"0000-0003-2273-9888\", \"0000-0002-0937-7061\", \"0000-0002-1770-9660\", \"0000-0001-5696-1052\", \"0000-0003-2998-5520\", \"0000-0003-1799-0971\", \"0000-0002-0156-3580\", \"0000-0002-9625-6742\", \"0000-0003-1399-7156\", \"0000-0001-9985-1697\", \"0000-0001-6537-7683\", \"0000-0002-8401-8018\", \"0000-0003-4246-8579\", \"0000-0001-7857-4133\", \"0000-0002-7710-0355\", \"0000-0001-5869-2204\", \"0000-0002-8083-7382\", \"0000-0001-8670-4372\", \"0000-0001-7654-5013\", \"0000-0002-4488-2880\", \"0000-0003-4374-6374\", \"0000-0001-6939-3859\", \"0000-0003-2509-2549\", \"0000-0002-3186-8860\", \"0000-0002-0441-1507\", \"0000-0001-5230-715X\", \"0000-0003-0209-180X\", \"0000-0001-9336-6850\", \"0000-0002-0463-0048\", \"0000-0001-5458-7167\", \"0000-0002-9381-2264\", \"0000-0002-8227-5387\", \"0000-0002-3061-3364\", \"0000-0002-9293-0189\", \"0000-0002-3123-3021\", \"0000-0003-1071-4296\", \"0000-0003-3340-6413\", \"0000-0003-3584-6834\", \"0000-0002-8644-8396\", \"0000-0002-2935-1934\", \"0000-0002-1718-1632\", \"0000-0002-8659-6321\", \"0000-0002-8449-2211\", \"0000-0003-1693-3190\", \"0000-0001-5637-1124\", \"0000-0001-5167-7466\", \"0000-0002-3532-043X\", \"0000-0001-6861-9561\", \"0000-0003-4608-3844\", \"0000-0003-4505-3678\", \"0000-0003-4673-1063\", \"0000-0001-8174-8835\", \"0000-0002-6347-9464\", \"0000-0002-8918-2781\", \"0000-0003-4511-7942\", \"0000-0003-2532-2906\", \"0000-0001-9280-6017\", \"0000-0002-5274-7742\", \"0000-0001-9586-0780\", \"0000-0003-3720-1183\", \"0000-0001-5819-4555\", \"0000-0002-1103-9651\", \"0000-0001-8135-2304\", \"0000-0002-8499-1045\", \"0000-0003-2550-1859\", \"0000-0002-8665-9281\", \"0000-0001-7818-3212\", \"0000-0003-1863-0265\", \"0000-0001-8733-5230\", \"0000-0003-2218-1343\", \"0000-0002-5306-7781\", \"0000-0001-7728-4046\", \"0000-0003-4486-2684\", \"0000-0002-4982-5236\", \"0000-0001-5300-3932\", \"0000-0003-3342-6123\", \"0000-0002-8072-1152\", \"0000-0002-3494-2624\", \"0000-0002-0715-0461\", \"0000-0002-3907-3552\", \"0000-0001-5556-8275\", \"0000-0002-3597-3350\", \"0000-0002-2252-672X\", \"0000-0001-7392-9361\", \"0000-0001-8689-185X\"], \"y\": [154, 148, 140, 140, 132, 124, 115, 106, 105, 102, 100, 94, 92, 92, 88, 86, 78, 77, 75, 75, 72, 71, 70, 68, 68, 68, 67, 66, 64, 64, 63, 62, 61, 61, 61, 60, 60, 56, 55, 54, 54, 53, 53, 53, 53, 53, 52, 52, 52, 51, 51, 51, 50, 50, 50, 50, 50, 49, 49, 49, 49, 48, 48, 48, 48, 48, 47, 47, 47, 47, 46, 46, 46, 45, 45, 45, 45, 44, 44, 44, 44, 44, 44, 44, 44, 44, 43, 43, 43, 43, 43, 43, 43, 43, 42, 42, 42, 42, 42, 42]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Keywords provided by ORCiD\"}, \"xaxis\": {\"range\": [-0.5, 99.5], \"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('45557cd6-bf47-4a92-93fb-f079792fe91c');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(100)\n",
"data = [\n",
" go.Bar(\n",
" x=keywords_by_orcid[:TOP_N]['orcid'],\n",
" y=keywords_by_orcid[:TOP_N]['n_keywords']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Keywords provided by ORCiD',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"top_keywords = df[['orcid', 'keywords']]\\\n",
" .explode('keywords')\\\n",
" .reset_index(drop=True)\\\n",
" .groupby('keywords')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"machine learning",
"bioinformatics",
"education",
"molecular biology",
"cancer",
"ecology",
"artificial intelligence",
"epidemiology",
"public health",
"microbiology",
"neuroscience",
"immunology",
"genetics",
"climate change",
"remote sensing",
"biochemistry",
"genomics",
"biotechnology",
"nanotechnology",
"sustainability",
"educación",
"gis",
"deep learning",
"psychology",
"computer vision",
"marketing",
"nutrition",
"innovation",
"data science",
"statistics",
"data mining",
"nanomaterials",
"image processing",
"robotics",
"management",
"optimization",
"renewable energy",
"chemistry",
"biomaterials",
"diabetes",
"gender",
"educação",
"architecture",
"catalysis",
"history",
"electrochemistry",
"evolution",
"research",
"energy",
"biodiversity"
],
"y": [
8574,
5424,
5191,
4557,
4163,
3923,
3839,
3789,
3676,
3550,
3495,
3468,
3343,
3337,
3279,
3003,
2794,
2681,
2674,
2654,
2526,
2511,
2466,
2381,
2309,
2213,
2199,
2154,
2153,
2144,
2108,
2100,
2099,
2086,
2081,
2071,
2009,
2005,
2002,
1998,
1997,
1873,
1835,
1813,
1813,
1800,
1797,
1789,
1770,
1717
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top-50 keywords occurrence"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"29c72e99-f843-403e-8094-5151b381ebcc\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"29c72e99-f843-403e-8094-5151b381ebcc\")) { Plotly.newPlot( \"29c72e99-f843-403e-8094-5151b381ebcc\", [{\"type\": \"bar\", \"x\": [\"machine learning\", \"bioinformatics\", \"education\", \"molecular biology\", \"cancer\", \"ecology\", \"artificial intelligence\", \"epidemiology\", \"public health\", \"microbiology\", \"neuroscience\", \"immunology\", \"genetics\", \"climate change\", \"remote sensing\", \"biochemistry\", \"genomics\", \"biotechnology\", \"nanotechnology\", \"sustainability\", \"educaci\\u00f3n\", \"gis\", \"deep learning\", \"psychology\", \"computer vision\", \"marketing\", \"nutrition\", \"innovation\", \"data science\", \"statistics\", \"data mining\", \"nanomaterials\", \"image processing\", \"robotics\", \"management\", \"optimization\", \"renewable energy\", \"chemistry\", \"biomaterials\", \"diabetes\", \"gender\", \"educa\\u00e7\\u00e3o\", \"architecture\", \"catalysis\", \"history\", \"electrochemistry\", \"evolution\", \"research\", \"energy\", \"biodiversity\"], \"y\": [8574, 5424, 5191, 4557, 4163, 3923, 3839, 3789, 3676, 3550, 3495, 3468, 3343, 3337, 3279, 3003, 2794, 2681, 2674, 2654, 2526, 2511, 2466, 2381, 2309, 2213, 2199, 2154, 2153, 2144, 2108, 2100, 2099, 2086, 2081, 2071, 2009, 2005, 2002, 1998, 1997, 1873, 1835, 1813, 1813, 1800, 1797, 1789, 1770, 1717]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Top-50 keywords occurrence\"}, \"xaxis\": {\"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('29c72e99-f843-403e-8094-5151b381ebcc');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(50)\n",
"data = [\n",
" go.Bar(\n",
" x=top_keywords[:TOP_N].index,\n",
" y=top_keywords[:TOP_N]['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top-%s keywords occurrence' % TOP_N,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Education"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 2.441645e+06\n",
"mean 1.816169e+00\n",
"std 1.132196e+00\n",
"min 1.000000e+00\n",
"25% 1.000000e+00\n",
"50% 1.000000e+00\n",
"75% 2.000000e+00\n",
"max 2.000000e+02\n",
"Name: n_education, dtype: float64"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.n_education.describe()"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" <th>ext_works_source</th>\n",
" <th>n_ext_work_source</th>\n",
" <th>authoritative</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10536169</th>\n",
" <td>0000-0002-1927-0292</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>phd. carmen m</td>\n",
" <td>galvez-sánchez</td>\n",
" <td>my name is carmen maria galvez sánchez. i´m a ...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[gender-based violence, fibromyalgia, quantita...</td>\n",
" <td>[[loop profile, 509331], [scopus author id, 57...</td>\n",
" <td>[[psychology, 2019-2020 course. degree in psyc...</td>\n",
" <td>[[researcher and teaching staff. postdoctoral ...</td>\n",
" <td>35</td>\n",
" <td>[phd. carmen m galvez-sánchez, multidisciplina...</td>\n",
" <td>2016-04-18 14:28:57.237000+00:00</td>\n",
" <td>2021-03-06 14:17:33.246000+00:00</td>\n",
" <td>24</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>200</td>\n",
" <td>3</td>\n",
" <td>[multidisciplinary digital publishing institut...</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"10536169 0000-0002-1927-0292 True True \n",
"\n",
" given_names family_name \\\n",
"10536169 phd. carmen m galvez-sánchez \n",
"\n",
" biography other_names \\\n",
"10536169 my name is carmen maria galvez sánchez. i´m a ... NaN \n",
"\n",
" primary_email keywords \\\n",
"10536169 <NA> [gender-based violence, fibromyalgia, quantita... \n",
"\n",
" external_ids \\\n",
"10536169 [[loop profile, 509331], [scopus author id, 57... \n",
"\n",
" education \\\n",
"10536169 [[psychology, 2019-2020 course. degree in psyc... \n",
"\n",
" employment n_works \\\n",
"10536169 [[researcher and teaching staff. postdoctoral ... 35 \n",
"\n",
" works_source \\\n",
"10536169 [phd. carmen m galvez-sánchez, multidisciplina... \n",
"\n",
" activation_date last_update_date \\\n",
"10536169 2016-04-18 14:28:57.237000+00:00 2021-03-06 14:17:33.246000+00:00 \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"10536169 24 0 0 7 True NaN \n",
"\n",
" other_email_domains url_domains n_emails n_urls n_ids n_keywords \\\n",
"10536169 NaN NaN <NA> <NA> 2 5 \n",
"\n",
" n_education n_employment \\\n",
"10536169 200 3 \n",
"\n",
" ext_works_source \\\n",
"10536169 [multidisciplinary digital publishing institut... \n",
"\n",
" n_ext_work_source authoritative \n",
"10536169 4 True "
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.n_education == df.n_education.max()]"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>education</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>0000-0002-2343-910X</td>\n",
" <td>[aeronautics and astronautics, phd, massachuse...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>0000-0002-2343-910X</td>\n",
" <td>[aeronautics and astronautics, sm, massachuset...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>0000-0002-2343-910X</td>\n",
" <td>[mechanical engineering and material science, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>0000-0002-2638-4108</td>\n",
" <td>[public law, ph doctor, university of oviedo, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>0000-0003-1435-6545</td>\n",
" <td>[morfologia, , universidade estadual paulista ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989644</th>\n",
" <td>0000-0002-1686-1935</td>\n",
" <td>[, , south china agricultural university, guan...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989645</th>\n",
" <td>0000-0002-3800-6331</td>\n",
" <td>[richard gilder graduate school, phd in compar...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989645</th>\n",
" <td>0000-0002-3800-6331</td>\n",
" <td>[geological sciences and history (dual major),...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989647</th>\n",
" <td>0000-0002-7584-2283</td>\n",
" <td>[school of electronics and information, master...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989647</th>\n",
" <td>0000-0002-7584-2283</td>\n",
" <td>[ department of electrical engineering, bachel...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4434439 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid \\\n",
"28 0000-0002-2343-910X \n",
"28 0000-0002-2343-910X \n",
"28 0000-0002-2343-910X \n",
"29 0000-0002-2638-4108 \n",
"46 0000-0003-1435-6545 \n",
"... ... \n",
"10989644 0000-0002-1686-1935 \n",
"10989645 0000-0002-3800-6331 \n",
"10989645 0000-0002-3800-6331 \n",
"10989647 0000-0002-7584-2283 \n",
"10989647 0000-0002-7584-2283 \n",
"\n",
" education \n",
"28 [aeronautics and astronautics, phd, massachuse... \n",
"28 [aeronautics and astronautics, sm, massachuset... \n",
"28 [mechanical engineering and material science, ... \n",
"29 [public law, ph doctor, university of oviedo, ... \n",
"46 [morfologia, , universidade estadual paulista ... \n",
"... ... \n",
"10989644 [, , south china agricultural university, guan... \n",
"10989645 [richard gilder graduate school, phd in compar... \n",
"10989645 [geological sciences and history (dual major),... \n",
"10989647 [school of electronics and information, master... \n",
"10989647 [ department of electrical engineering, bachel... \n",
"\n",
"[4434439 rows x 2 columns]"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exploded_education = df[['orcid', 'education']].explode('education').dropna()\n",
"exploded_education"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
"exploded_education[['degree', 'role', 'university', 'city', 'region', 'country', 'id', 'id_scheme']] = pd.DataFrame(exploded_education.education.tolist(), index=exploded_education.index)"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"exploded_education.id.replace('', pd.NA, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0000-0001-5000-0162</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0000-0001-5000-0170</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0000-0001-5000-0218</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000-0001-5000-0226</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0000-0001-5000-0306</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2441640</th>\n",
" <td>0000-0003-4999-9719</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2441641</th>\n",
" <td>0000-0003-4999-9735</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2441642</th>\n",
" <td>0000-0003-4999-992X</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2441643</th>\n",
" <td>0000-0003-4999-9938</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2441644</th>\n",
" <td>0000-0003-4999-9954</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2441645 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid id\n",
"0 0000-0001-5000-0162 3\n",
"1 0000-0001-5000-0170 2\n",
"2 0000-0001-5000-0218 3\n",
"3 0000-0001-5000-0226 1\n",
"4 0000-0001-5000-0306 0\n",
"... ... ..\n",
"2441640 0000-0003-4999-9719 1\n",
"2441641 0000-0003-4999-9735 1\n",
"2441642 0000-0003-4999-992X 2\n",
"2441643 0000-0003-4999-9938 2\n",
"2441644 0000-0003-4999-9954 1\n",
"\n",
"[2441645 rows x 2 columns]"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exploded_education.groupby('orcid').id.count().reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"df = df.merge(exploded_education.groupby('orcid').id.count().reset_index(), on='orcid', how='left')\n",
"df.rename(columns={'id': 'n_valid_education'}, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" <th>ext_works_source</th>\n",
" <th>n_ext_work_source</th>\n",
" <th>authoritative</th>\n",
" <th>n_valid_education</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>0000-0003-1435-6545</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[migration, culture cell, prostate cancer]</td>\n",
" <td>[[researcherid, p-2223-2018]]</td>\n",
" <td>[[morfologia, , universidade estadual paulista...</td>\n",
" <td>[[, universidade estadual paulista (unesp), in...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-08-09 12:12:24.405000+00:00</td>\n",
" <td>2020-04-22 01:38:03.184000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[cnpq.br, linkedin.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>74</th>\n",
" <td>0000-0002-0427-9745</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>a. can</td>\n",
" <td>inci</td>\n",
" <td>i am a professor of finance at bryant universi...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>[[researcherid, b-5471-2018], [scopus author i...</td>\n",
" <td>[[finance, ph.d., university of michigan - ros...</td>\n",
" <td>[[professor of finance, bryant university, smi...</td>\n",
" <td>34</td>\n",
" <td>[a. can inci]</td>\n",
" <td>2018-01-20 02:58:05.199000+00:00</td>\n",
" <td>2020-06-16 12:35:09.403000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>[]</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88</th>\n",
" <td>0000-0002-3380-6671</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>abdul</td>\n",
" <td>asis pata</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[agribisnis, m.si, universitas hasanuddin, ma...</td>\n",
" <td>[[s.p, universitas muslim maros, maros, , id, ...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-02-12 02:08:37.018000+00:00</td>\n",
" <td>2018-02-12 02:22:33.378000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>98</th>\n",
" <td>0000-0001-6902-6549</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>abubakar</td>\n",
" <td>muhammad</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[school of electrical and information enginee...</td>\n",
" <td>[[lecturer, university of faisalabad, faisalab...</td>\n",
" <td>1</td>\n",
" <td>[multidisciplinary digital publishing institute]</td>\n",
" <td>2017-07-06 10:29:17.738000+00:00</td>\n",
" <td>2020-08-01 05:18:53.393000+00:00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>[multidisciplinary digital publishing institute]</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>101</th>\n",
" <td>0000-0002-6142-6406</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>adam</td>\n",
" <td>mamadou</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[département deconomie sociologie rurale et t...</td>\n",
" <td>[[, institut national de la recherche agronomi...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-02-15 09:54:59.943000+00:00</td>\n",
" <td>2018-02-15 10:19:27.869000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989595</th>\n",
" <td>0000-0002-1842-4130</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>josé de jesús</td>\n",
" <td>cázares-marinero</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[josé cázares]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[chemistry, chemical biology, industrial chemi...</td>\n",
" <td>[[researcherid, h-2597-2013], [scopus author i...</td>\n",
" <td>[[charles friedel, postdoc, école nationale su...</td>\n",
" <td>[[mtc, polioles, mexico, , mx, , ], [head of r...</td>\n",
" <td>17</td>\n",
" <td>[crossref metadata search, scopus - elsevier, ...</td>\n",
" <td>2013-07-09 14:39:30.950000+00:00</td>\n",
" <td>2020-12-10 17:42:20.176000+00:00</td>\n",
" <td>17</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>29</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[linkedin.com, google.com, researchgate.net]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>[crossref metadata search, scopus - elsevier]</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989603</th>\n",
" <td>0000-0003-0459-4822</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>luana</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>mestranda em tecnologia na saúde e foi aluna o...</td>\n",
" <td>[luana bastos morey]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[tradução; língua espanhol; língua portuguesa;...</td>\n",
" <td>NaN</td>\n",
" <td>[[pós-graduação em tecnologia em saúde stricto...</td>\n",
" <td>[[professora de espanhol e português para estr...</td>\n",
" <td>7</td>\n",
" <td>[luana arrial bastos]</td>\n",
" <td>2017-05-11 13:14:59.372000+00:00</td>\n",
" <td>2020-12-08 20:18:24.163000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[unidospelasaude.com.br, facebook.com, faceboo...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>4</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>[]</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989605</th>\n",
" <td>0000-0003-0057-1551</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>lyudmyla</td>\n",
" <td>antypenko</td>\n",
" <td>the phd degree of pharmacy was received under ...</td>\n",
" <td>[lyudmila nikolaevna antipenko (russian transl...</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[structure elucidation, organic synthesis, val...</td>\n",
" <td>[[scopus author id, 55070809900], [researcheri...</td>\n",
" <td>[[centre for nanomaterials, advanced technolog...</td>\n",
" <td>[[visiting scientist, north dakota state unive...</td>\n",
" <td>35</td>\n",
" <td>[crossref metadata search, scopus - elsevier, ...</td>\n",
" <td>2014-02-19 08:15:15.698000+00:00</td>\n",
" <td>2020-12-09 18:14:17.963000+00:00</td>\n",
" <td>28</td>\n",
" <td>0</td>\n",
" <td>11</td>\n",
" <td>17</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>[crossref metadata search, scopus - elsevier, ...</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989619</th>\n",
" <td>0000-0003-4653-4705</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>patricia</td>\n",
" <td>teixeira</td>\n",
" <td>2005 - phd, university of coimbrajuly 2009-jun...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[estua, heavy metals, steroid hormones, ecotox...</td>\n",
" <td>[[researcherid, i-6863-2013], [scopus author i...</td>\n",
" <td>[[, phd, university of coimbra, coimbra, , pt,...</td>\n",
" <td>[[senior researcher, university of coimbra, co...</td>\n",
" <td>95</td>\n",
" <td>[ciênciavitae, scopus - elsevier, pg cardoso, ...</td>\n",
" <td>2013-11-26 10:59:34.331000+00:00</td>\n",
" <td>2020-12-02 15:28:26.221000+00:00</td>\n",
" <td>90</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>42</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>[ciênciavitae, scopus - elsevier, pg cardoso, ...</td>\n",
" <td>4</td>\n",
" <td>True</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989644</th>\n",
" <td>0000-0002-1686-1935</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>youxia</td>\n",
" <td>wang</td>\n",
" <td>youxia wang (1995-), native of zunyi, guizhou ...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[institute of animal nutrition, master degree...</td>\n",
" <td>[[master, sichuan agricultural university , ch...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-12-11 02:11:51.808000+00:00</td>\n",
" <td>2020-12-11 03:25:28.263000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>641229 rows × 34 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"46 0000-0003-1435-6545 True True \n",
"74 0000-0002-0427-9745 True True \n",
"88 0000-0002-3380-6671 True True \n",
"98 0000-0001-6902-6549 True True \n",
"101 0000-0002-6142-6406 True True \n",
"... ... ... ... \n",
"10989595 0000-0002-1842-4130 True True \n",
"10989603 0000-0003-0459-4822 True True \n",
"10989605 0000-0003-0057-1551 True True \n",
"10989619 0000-0003-4653-4705 True True \n",
"10989644 0000-0002-1686-1935 True True \n",
"\n",
" given_names family_name \\\n",
"46 <NA> <NA> \n",
"74 a. can inci \n",
"88 abdul asis pata \n",
"98 abubakar muhammad \n",
"101 adam mamadou \n",
"... ... ... \n",
"10989595 josé de jesús cázares-marinero \n",
"10989603 luana <NA> \n",
"10989605 lyudmyla antypenko \n",
"10989619 patricia teixeira \n",
"10989644 youxia wang \n",
"\n",
" biography \\\n",
"46 <NA> \n",
"74 i am a professor of finance at bryant universi... \n",
"88 <NA> \n",
"98 <NA> \n",
"101 <NA> \n",
"... ... \n",
"10989595 <NA> \n",
"10989603 mestranda em tecnologia na saúde e foi aluna o... \n",
"10989605 the phd degree of pharmacy was received under ... \n",
"10989619 2005 - phd, university of coimbrajuly 2009-jun... \n",
"10989644 youxia wang (1995-), native of zunyi, guizhou ... \n",
"\n",
" other_names primary_email \\\n",
"46 NaN <NA> \n",
"74 NaN <NA> \n",
"88 NaN <NA> \n",
"98 NaN <NA> \n",
"101 NaN <NA> \n",
"... ... ... \n",
"10989595 [josé cázares] <NA> \n",
"10989603 [luana bastos morey] <NA> \n",
"10989605 [lyudmila nikolaevna antipenko (russian transl... <NA> \n",
"10989619 NaN <NA> \n",
"10989644 NaN <NA> \n",
"\n",
" keywords \\\n",
"46 [migration, culture cell, prostate cancer] \n",
"74 NaN \n",
"88 NaN \n",
"98 NaN \n",
"101 NaN \n",
"... ... \n",
"10989595 [chemistry, chemical biology, industrial chemi... \n",
"10989603 [tradução; língua espanhol; língua portuguesa;... \n",
"10989605 [structure elucidation, organic synthesis, val... \n",
"10989619 [estua, heavy metals, steroid hormones, ecotox... \n",
"10989644 NaN \n",
"\n",
" external_ids \\\n",
"46 [[researcherid, p-2223-2018]] \n",
"74 [[researcherid, b-5471-2018], [scopus author i... \n",
"88 NaN \n",
"98 NaN \n",
"101 NaN \n",
"... ... \n",
"10989595 [[researcherid, h-2597-2013], [scopus author i... \n",
"10989603 NaN \n",
"10989605 [[scopus author id, 55070809900], [researcheri... \n",
"10989619 [[researcherid, i-6863-2013], [scopus author i... \n",
"10989644 NaN \n",
"\n",
" education \\\n",
"46 [[morfologia, , universidade estadual paulista... \n",
"74 [[finance, ph.d., university of michigan - ros... \n",
"88 [[agribisnis, m.si, universitas hasanuddin, ma... \n",
"98 [[school of electrical and information enginee... \n",
"101 [[département deconomie sociologie rurale et t... \n",
"... ... \n",
"10989595 [[charles friedel, postdoc, école nationale su... \n",
"10989603 [[pós-graduação em tecnologia em saúde stricto... \n",
"10989605 [[centre for nanomaterials, advanced technolog... \n",
"10989619 [[, phd, university of coimbra, coimbra, , pt,... \n",
"10989644 [[institute of animal nutrition, master degree... \n",
"\n",
" employment n_works \\\n",
"46 [[, universidade estadual paulista (unesp), in... 0 \n",
"74 [[professor of finance, bryant university, smi... 34 \n",
"88 [[s.p, universitas muslim maros, maros, , id, ... 0 \n",
"98 [[lecturer, university of faisalabad, faisalab... 1 \n",
"101 [[, institut national de la recherche agronomi... 0 \n",
"... ... ... \n",
"10989595 [[mtc, polioles, mexico, , mx, , ], [head of r... 17 \n",
"10989603 [[professora de espanhol e português para estr... 7 \n",
"10989605 [[visiting scientist, north dakota state unive... 35 \n",
"10989619 [[senior researcher, university of coimbra, co... 95 \n",
"10989644 [[master, sichuan agricultural university , ch... 0 \n",
"\n",
" works_source \\\n",
"46 NaN \n",
"74 [a. can inci] \n",
"88 NaN \n",
"98 [multidisciplinary digital publishing institute] \n",
"101 NaN \n",
"... ... \n",
"10989595 [crossref metadata search, scopus - elsevier, ... \n",
"10989603 [luana arrial bastos] \n",
"10989605 [crossref metadata search, scopus - elsevier, ... \n",
"10989619 [ciênciavitae, scopus - elsevier, pg cardoso, ... \n",
"10989644 NaN \n",
"\n",
" activation_date last_update_date \\\n",
"46 2018-08-09 12:12:24.405000+00:00 2020-04-22 01:38:03.184000+00:00 \n",
"74 2018-01-20 02:58:05.199000+00:00 2020-06-16 12:35:09.403000+00:00 \n",
"88 2018-02-12 02:08:37.018000+00:00 2018-02-12 02:22:33.378000+00:00 \n",
"98 2017-07-06 10:29:17.738000+00:00 2020-08-01 05:18:53.393000+00:00 \n",
"101 2018-02-15 09:54:59.943000+00:00 2018-02-15 10:19:27.869000+00:00 \n",
"... ... ... \n",
"10989595 2013-07-09 14:39:30.950000+00:00 2020-12-10 17:42:20.176000+00:00 \n",
"10989603 2017-05-11 13:14:59.372000+00:00 2020-12-08 20:18:24.163000+00:00 \n",
"10989605 2014-02-19 08:15:15.698000+00:00 2020-12-09 18:14:17.963000+00:00 \n",
"10989619 2013-11-26 10:59:34.331000+00:00 2020-12-02 15:28:26.221000+00:00 \n",
"10989644 2020-12-11 02:11:51.808000+00:00 2020-12-11 03:25:28.263000+00:00 \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"46 0 0 0 0 False NaN \n",
"74 0 0 0 0 False NaN \n",
"88 0 0 0 0 False NaN \n",
"98 1 0 0 0 True NaN \n",
"101 0 0 0 0 False NaN \n",
"... ... ... ... ... ... ... \n",
"10989595 17 0 0 29 False NaN \n",
"10989603 0 0 0 0 False NaN \n",
"10989605 28 0 11 17 True NaN \n",
"10989619 90 0 0 42 False NaN \n",
"10989644 0 0 0 0 False NaN \n",
"\n",
" other_email_domains \\\n",
"46 NaN \n",
"74 NaN \n",
"88 NaN \n",
"98 NaN \n",
"101 NaN \n",
"... ... \n",
"10989595 NaN \n",
"10989603 NaN \n",
"10989605 NaN \n",
"10989619 NaN \n",
"10989644 NaN \n",
"\n",
" url_domains n_emails n_urls \\\n",
"46 [cnpq.br, linkedin.com] <NA> 2 \n",
"74 NaN <NA> <NA> \n",
"88 NaN <NA> <NA> \n",
"98 NaN <NA> <NA> \n",
"101 NaN <NA> <NA> \n",
"... ... ... ... \n",
"10989595 [linkedin.com, google.com, researchgate.net] <NA> 3 \n",
"10989603 [unidospelasaude.com.br, facebook.com, faceboo... <NA> 4 \n",
"10989605 NaN <NA> <NA> \n",
"10989619 NaN <NA> <NA> \n",
"10989644 NaN <NA> <NA> \n",
"\n",
" n_ids n_keywords n_education n_employment \\\n",
"46 1 3 1 1 \n",
"74 2 <NA> 4 5 \n",
"88 <NA> <NA> 1 1 \n",
"98 <NA> <NA> 1 1 \n",
"101 <NA> <NA> 1 1 \n",
"... ... ... ... ... \n",
"10989595 2 5 3 3 \n",
"10989603 <NA> 2 4 3 \n",
"10989605 2 5 7 8 \n",
"10989619 3 7 1 3 \n",
"10989644 <NA> <NA> 2 1 \n",
"\n",
" ext_works_source \\\n",
"46 NaN \n",
"74 [] \n",
"88 NaN \n",
"98 [multidisciplinary digital publishing institute] \n",
"101 NaN \n",
"... ... \n",
"10989595 [crossref metadata search, scopus - elsevier] \n",
"10989603 [] \n",
"10989605 [crossref metadata search, scopus - elsevier, ... \n",
"10989619 [ciênciavitae, scopus - elsevier, pg cardoso, ... \n",
"10989644 NaN \n",
"\n",
" n_ext_work_source authoritative n_valid_education \n",
"46 <NA> False 0.0 \n",
"74 0 False 0.0 \n",
"88 <NA> False 0.0 \n",
"98 1 True 0.0 \n",
"101 <NA> False 0.0 \n",
"... ... ... ... \n",
"10989595 2 True 0.0 \n",
"10989603 0 False 3.0 \n",
"10989605 4 True 4.0 \n",
"10989619 4 True 0.0 \n",
"10989644 <NA> False 1.0 \n",
"\n",
"[641229 rows x 34 columns]"
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.n_education != df.n_valid_education]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Employment"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 2.680488e+06\n",
"mean 1.664713e+00\n",
"std 1.530077e+00\n",
"min 1.000000e+00\n",
"25% 1.000000e+00\n",
"50% 1.000000e+00\n",
"75% 2.000000e+00\n",
"max 1.980000e+02\n",
"Name: n_employment, dtype: float64"
]
},
"execution_count": 69,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.n_employment.describe()"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" <th>ext_works_source</th>\n",
" <th>n_ext_work_source</th>\n",
" <th>authoritative</th>\n",
" <th>n_valid_education</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>8289432</th>\n",
" <td>0000-0002-0293-964X</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>ben zhong</td>\n",
" <td>tang</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[唐本忠]</td>\n",
" <td>tangbenz@ust.hk</td>\n",
" <td>[nanotechnology, fluorescent biosensors, optic...</td>\n",
" <td>[[hkust profile, tang-benzhong], [researcherid...</td>\n",
" <td>[[department of chemistry and faculty of pharm...</td>\n",
" <td>[[chair professor, division of biomedical engi...</td>\n",
" <td>422</td>\n",
" <td>[tang, benzhong, crossref]</td>\n",
" <td>2015-03-13 00:28:33.270000+00:00</td>\n",
" <td>2021-03-23 07:56:34.824000+00:00</td>\n",
" <td>359</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>ust.hk</td>\n",
" <td>NaN</td>\n",
" <td>[ust.hk]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>198</td>\n",
" <td>[crossref]</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"8289432 0000-0002-0293-964X True True \n",
"\n",
" given_names family_name biography other_names primary_email \\\n",
"8289432 ben zhong tang <NA> [唐本忠] tangbenz@ust.hk \n",
"\n",
" keywords \\\n",
"8289432 [nanotechnology, fluorescent biosensors, optic... \n",
"\n",
" external_ids \\\n",
"8289432 [[hkust profile, tang-benzhong], [researcherid... \n",
"\n",
" education \\\n",
"8289432 [[department of chemistry and faculty of pharm... \n",
"\n",
" employment n_works \\\n",
"8289432 [[chair professor, division of biomedical engi... 422 \n",
"\n",
" works_source activation_date \\\n",
"8289432 [tang, benzhong, crossref] 2015-03-13 00:28:33.270000+00:00 \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids \\\n",
"8289432 2021-03-23 07:56:34.824000+00:00 359 0 0 0 \n",
"\n",
" label primary_email_domain other_email_domains url_domains n_emails \\\n",
"8289432 False ust.hk NaN [ust.hk] <NA> \n",
"\n",
" n_urls n_ids n_keywords n_education n_employment \\\n",
"8289432 1 3 7 7 198 \n",
"\n",
" ext_works_source n_ext_work_source authoritative n_valid_education \n",
"8289432 [crossref] 1 True 3.0 "
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.n_employment == df.n_employment.max()]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's count how many employments have a valid assigned id by orcid (ringols, isni, grid, etc.)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0000-0001-6112-5550</td>\n",
" <td>[professor, peoples friendship university of r...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000-0001-6220-5683</td>\n",
" <td>[research scientist, new york university abu d...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0000-0001-7071-8294</td>\n",
" <td>[researcher (academic), universidad de zaragoz...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0000-0001-7071-8294</td>\n",
" <td>[researcher (academic), instituto de síntesis ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0000-0001-7402-0096</td>\n",
" <td>[, kth royal institute of technology, stockhol...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989643</th>\n",
" <td>0000-0003-2606-0936</td>\n",
" <td>[post-doc, institute of biochemistry and cell ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989644</th>\n",
" <td>0000-0002-1686-1935</td>\n",
" <td>[master, sichuan agricultural university , che...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989645</th>\n",
" <td>0000-0002-3800-6331</td>\n",
" <td>[assistant professor, baruch college, city uni...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989645</th>\n",
" <td>0000-0002-3800-6331</td>\n",
" <td>[postdoctoral scholar, university of californi...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989647</th>\n",
" <td>0000-0002-7584-2283</td>\n",
" <td>[lecturer, henan institute of science and tech...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>4462243 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid \\\n",
"1 0000-0001-6112-5550 \n",
"3 0000-0001-6220-5683 \n",
"4 0000-0001-7071-8294 \n",
"4 0000-0001-7071-8294 \n",
"6 0000-0001-7402-0096 \n",
"... ... \n",
"10989643 0000-0003-2606-0936 \n",
"10989644 0000-0002-1686-1935 \n",
"10989645 0000-0002-3800-6331 \n",
"10989645 0000-0002-3800-6331 \n",
"10989647 0000-0002-7584-2283 \n",
"\n",
" employment \n",
"1 [professor, peoples friendship university of r... \n",
"3 [research scientist, new york university abu d... \n",
"4 [researcher (academic), universidad de zaragoz... \n",
"4 [researcher (academic), instituto de síntesis ... \n",
"6 [, kth royal institute of technology, stockhol... \n",
"... ... \n",
"10989643 [post-doc, institute of biochemistry and cell ... \n",
"10989644 [master, sichuan agricultural university , che... \n",
"10989645 [assistant professor, baruch college, city uni... \n",
"10989645 [postdoctoral scholar, university of californi... \n",
"10989647 [lecturer, henan institute of science and tech... \n",
"\n",
"[4462243 rows x 2 columns]"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exploded_employment = df[['orcid', 'employment']].explode('employment').dropna()\n",
"exploded_employment"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
"exploded_employment[['role', 'institution', 'city', 'region', 'country', 'id', 'id_scheme']] = pd.DataFrame(exploded_employment.employment.tolist(), index=exploded_employment.index)"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"exploded_employment.id.replace('', pd.NA, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0000-0001-5000-0031</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0000-0001-5000-0138</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0000-0001-5000-0170</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000-0001-5000-0218</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0000-0001-5000-0226</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2680483</th>\n",
" <td>0000-0003-4999-9831</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2680484</th>\n",
" <td>0000-0003-4999-9890</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2680485</th>\n",
" <td>0000-0003-4999-992X</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2680486</th>\n",
" <td>0000-0003-4999-9938</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2680487</th>\n",
" <td>0000-0003-4999-9954</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2680488 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid id\n",
"0 0000-0001-5000-0031 1\n",
"1 0000-0001-5000-0138 1\n",
"2 0000-0001-5000-0170 2\n",
"3 0000-0001-5000-0218 1\n",
"4 0000-0001-5000-0226 1\n",
"... ... ..\n",
"2680483 0000-0003-4999-9831 1\n",
"2680484 0000-0003-4999-9890 1\n",
"2680485 0000-0003-4999-992X 0\n",
"2680486 0000-0003-4999-9938 1\n",
"2680487 0000-0003-4999-9954 2\n",
"\n",
"[2680488 rows x 2 columns]"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exploded_employment.groupby('orcid').id.count().reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [],
"source": [
"df = df.merge(exploded_employment.groupby('orcid').id.count().reset_index(), on='orcid', how='left')\n",
"df.rename(columns={'id': 'n_valid_employment'}, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" <th>ext_works_source</th>\n",
" <th>n_ext_work_source</th>\n",
" <th>authoritative</th>\n",
" <th>n_valid_education</th>\n",
" <th>n_valid_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0000-0001-6220-5683</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[research scientist, new york university abu ...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-08-18 12:36:45.307000+00:00</td>\n",
" <td>2020-09-23 13:37:54.180000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0000-0001-7071-8294</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[researcher (academic), universidad de zarago...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2014-03-10 13:22:01.966000+00:00</td>\n",
" <td>2016-06-14 22:17:54.470000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0000-0001-7402-0096</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, kth royal institute of technology, stockho...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-01-11 15:13:06.467000+00:00</td>\n",
" <td>2016-06-14 23:55:59.896000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[kth.se]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0000-0001-8377-3508</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[fontana, milena da silva]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[educação; informática; matemática.]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, instituto federal de educação, ciência e t...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-05-23 23:39:04.534000+00:00</td>\n",
" <td>2019-10-16 02:50:11.007000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[cnpq.br]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>38</th>\n",
" <td>0000-0002-6508-6998</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[researcher (academic), universidad de zarago...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2014-03-12 08:23:22.492000+00:00</td>\n",
" <td>2015-07-27 15:51:38.411000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989632</th>\n",
" <td>0000-0001-9133-2366</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>søren</td>\n",
" <td>staugaard</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, , aarhus universitet, aarhus, , dk, 1006, ...</td>\n",
" <td>[[, aarhus university, aarhus c, , dk, , ], [s...</td>\n",
" <td>29</td>\n",
" <td>[aarhus university, crossref]</td>\n",
" <td>2013-03-19 11:34:48.477000+00:00</td>\n",
" <td>2020-12-07 08:03:23.190000+00:00</td>\n",
" <td>14</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>35</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[au.dk, au.dk]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>[aarhus university, crossref]</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989634</th>\n",
" <td>0000-0001-8494-2123</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>tarun</td>\n",
" <td>jain</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[pet/ct specialist; nuclear medicine physician...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[assistant professor, mahatma gandhi medical ...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2014-12-19 08:21:46.292000+00:00</td>\n",
" <td>2020-12-09 06:03:57.055000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>5</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989636</th>\n",
" <td>0000-0002-2906-0299</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>tiffany</td>\n",
" <td>mackay</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[tiffany russel sia]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[prostate cancer, oxytocin, pet/ct, gpc-1, gal...</td>\n",
" <td>[[researcherid, a-2121-2017]]</td>\n",
" <td>[[faculty of medicine, master in pharmaceutica...</td>\n",
" <td>[[clinical project lead, minomic international...</td>\n",
" <td>11</td>\n",
" <td>[crossref, researcherid, tiffany mackay]</td>\n",
" <td>2017-01-03 23:28:48.736000+00:00</td>\n",
" <td>2020-12-09 17:12:20.326000+00:00</td>\n",
" <td>11</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[oxytocin.com.au, linkedin.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>13</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>[crossref, researcherid]</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989639</th>\n",
" <td>0000-0002-4422-4036</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>vijay</td>\n",
" <td>krishnan</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[psychiatry, md, all india institute of medic...</td>\n",
" <td>[[assistant professor, all india institute of ...</td>\n",
" <td>2</td>\n",
" <td>[crossref]</td>\n",
" <td>2015-05-28 17:24:39.519000+00:00</td>\n",
" <td>2020-11-24 08:57:22.875000+00:00</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>[crossref]</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989645</th>\n",
" <td>0000-0002-3800-6331</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>zachary</td>\n",
" <td>calamari</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[richard gilder graduate school, phd in compa...</td>\n",
" <td>[[assistant professor, baruch college, city un...</td>\n",
" <td>7</td>\n",
" <td>[crossref metadata search, zachary t. calamari...</td>\n",
" <td>2015-01-20 20:20:17.042000+00:00</td>\n",
" <td>2020-11-21 19:48:36.221000+00:00</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>[crossref metadata search, crossref]</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1036967 rows × 35 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"3 0000-0001-6220-5683 True True \n",
"4 0000-0001-7071-8294 True True \n",
"6 0000-0001-7402-0096 True True \n",
"11 0000-0001-8377-3508 True True \n",
"38 0000-0002-6508-6998 True True \n",
"... ... ... ... \n",
"10989632 0000-0001-9133-2366 True True \n",
"10989634 0000-0001-8494-2123 True True \n",
"10989636 0000-0002-2906-0299 True True \n",
"10989639 0000-0002-4422-4036 True True \n",
"10989645 0000-0002-3800-6331 True True \n",
"\n",
" given_names family_name biography other_names \\\n",
"3 <NA> <NA> <NA> NaN \n",
"4 <NA> <NA> <NA> NaN \n",
"6 <NA> <NA> <NA> NaN \n",
"11 <NA> <NA> <NA> [fontana, milena da silva] \n",
"38 <NA> <NA> <NA> NaN \n",
"... ... ... ... ... \n",
"10989632 søren staugaard <NA> NaN \n",
"10989634 tarun jain <NA> NaN \n",
"10989636 tiffany mackay <NA> [tiffany russel sia] \n",
"10989639 vijay krishnan <NA> NaN \n",
"10989645 zachary calamari <NA> NaN \n",
"\n",
" primary_email keywords \\\n",
"3 <NA> NaN \n",
"4 <NA> NaN \n",
"6 <NA> NaN \n",
"11 <NA> [educação; informática; matemática.] \n",
"38 <NA> NaN \n",
"... ... ... \n",
"10989632 <NA> NaN \n",
"10989634 <NA> [pet/ct specialist; nuclear medicine physician... \n",
"10989636 <NA> [prostate cancer, oxytocin, pet/ct, gpc-1, gal... \n",
"10989639 <NA> NaN \n",
"10989645 <NA> NaN \n",
"\n",
" external_ids \\\n",
"3 NaN \n",
"4 NaN \n",
"6 NaN \n",
"11 NaN \n",
"38 NaN \n",
"... ... \n",
"10989632 NaN \n",
"10989634 NaN \n",
"10989636 [[researcherid, a-2121-2017]] \n",
"10989639 NaN \n",
"10989645 NaN \n",
"\n",
" education \\\n",
"3 NaN \n",
"4 NaN \n",
"6 NaN \n",
"11 NaN \n",
"38 NaN \n",
"... ... \n",
"10989632 [[, , aarhus universitet, aarhus, , dk, 1006, ... \n",
"10989634 NaN \n",
"10989636 [[faculty of medicine, master in pharmaceutica... \n",
"10989639 [[psychiatry, md, all india institute of medic... \n",
"10989645 [[richard gilder graduate school, phd in compa... \n",
"\n",
" employment n_works \\\n",
"3 [[research scientist, new york university abu ... 0 \n",
"4 [[researcher (academic), universidad de zarago... 0 \n",
"6 [[, kth royal institute of technology, stockho... 0 \n",
"11 [[, instituto federal de educação, ciência e t... 0 \n",
"38 [[researcher (academic), universidad de zarago... 0 \n",
"... ... ... \n",
"10989632 [[, aarhus university, aarhus c, , dk, , ], [s... 29 \n",
"10989634 [[assistant professor, mahatma gandhi medical ... 0 \n",
"10989636 [[clinical project lead, minomic international... 11 \n",
"10989639 [[assistant professor, all india institute of ... 2 \n",
"10989645 [[assistant professor, baruch college, city un... 7 \n",
"\n",
" works_source \\\n",
"3 NaN \n",
"4 NaN \n",
"6 NaN \n",
"11 NaN \n",
"38 NaN \n",
"... ... \n",
"10989632 [aarhus university, crossref] \n",
"10989634 NaN \n",
"10989636 [crossref, researcherid, tiffany mackay] \n",
"10989639 [crossref] \n",
"10989645 [crossref metadata search, zachary t. calamari... \n",
"\n",
" activation_date last_update_date \\\n",
"3 2015-08-18 12:36:45.307000+00:00 2020-09-23 13:37:54.180000+00:00 \n",
"4 2014-03-10 13:22:01.966000+00:00 2016-06-14 22:17:54.470000+00:00 \n",
"6 2015-01-11 15:13:06.467000+00:00 2016-06-14 23:55:59.896000+00:00 \n",
"11 2018-05-23 23:39:04.534000+00:00 2019-10-16 02:50:11.007000+00:00 \n",
"38 2014-03-12 08:23:22.492000+00:00 2015-07-27 15:51:38.411000+00:00 \n",
"... ... ... \n",
"10989632 2013-03-19 11:34:48.477000+00:00 2020-12-07 08:03:23.190000+00:00 \n",
"10989634 2014-12-19 08:21:46.292000+00:00 2020-12-09 06:03:57.055000+00:00 \n",
"10989636 2017-01-03 23:28:48.736000+00:00 2020-12-09 17:12:20.326000+00:00 \n",
"10989639 2015-05-28 17:24:39.519000+00:00 2020-11-24 08:57:22.875000+00:00 \n",
"10989645 2015-01-20 20:20:17.042000+00:00 2020-11-21 19:48:36.221000+00:00 \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"3 0 0 0 0 False NaN \n",
"4 0 0 0 0 False NaN \n",
"6 0 0 0 0 False NaN \n",
"11 0 0 0 0 False NaN \n",
"38 0 0 0 0 False NaN \n",
"... ... ... ... ... ... ... \n",
"10989632 14 0 10 35 True NaN \n",
"10989634 0 0 0 0 False NaN \n",
"10989636 11 0 0 0 True NaN \n",
"10989639 2 0 0 0 False NaN \n",
"10989645 7 0 1 0 True NaN \n",
"\n",
" other_email_domains url_domains n_emails \\\n",
"3 NaN NaN <NA> \n",
"4 NaN NaN <NA> \n",
"6 NaN [kth.se] <NA> \n",
"11 NaN [cnpq.br] <NA> \n",
"38 NaN NaN <NA> \n",
"... ... ... ... \n",
"10989632 NaN [au.dk, au.dk] <NA> \n",
"10989634 NaN NaN <NA> \n",
"10989636 NaN [oxytocin.com.au, linkedin.com] <NA> \n",
"10989639 NaN NaN <NA> \n",
"10989645 NaN NaN <NA> \n",
"\n",
" n_urls n_ids n_keywords n_education n_employment \\\n",
"3 <NA> <NA> <NA> <NA> 1 \n",
"4 <NA> <NA> <NA> <NA> 2 \n",
"6 1 <NA> <NA> <NA> 1 \n",
"11 1 <NA> 1 <NA> 3 \n",
"38 <NA> <NA> <NA> <NA> 2 \n",
"... ... ... ... ... ... \n",
"10989632 2 <NA> <NA> 1 3 \n",
"10989634 <NA> <NA> 1 <NA> 5 \n",
"10989636 2 1 13 2 4 \n",
"10989639 <NA> <NA> <NA> 2 5 \n",
"10989645 <NA> <NA> <NA> 2 2 \n",
"\n",
" ext_works_source n_ext_work_source \\\n",
"3 NaN <NA> \n",
"4 NaN <NA> \n",
"6 NaN <NA> \n",
"11 NaN <NA> \n",
"38 NaN <NA> \n",
"... ... ... \n",
"10989632 [aarhus university, crossref] 2 \n",
"10989634 NaN <NA> \n",
"10989636 [crossref, researcherid] 2 \n",
"10989639 [crossref] 1 \n",
"10989645 [crossref metadata search, crossref] 2 \n",
"\n",
" authoritative n_valid_education n_valid_employment \n",
"3 False NaN 0.0 \n",
"4 False NaN 1.0 \n",
"6 False NaN 0.0 \n",
"11 False NaN 0.0 \n",
"38 False NaN 1.0 \n",
"... ... ... ... \n",
"10989632 True 1.0 1.0 \n",
"10989634 False NaN 4.0 \n",
"10989636 True 2.0 1.0 \n",
"10989639 True 2.0 3.0 \n",
"10989645 True 2.0 0.0 \n",
"\n",
"[1036967 rows x 35 columns]"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.n_employment != df.n_valid_employment]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Biography"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"TODO:\n",
"- correlazione temporale\n",
"- picchi di creazione account (giornaliera)"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"df.biography.replace('', np.NaN, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 354015\n",
"unique 337007\n",
"top car title loans are a more straightforward way...\n",
"freq 343\n",
"Name: biography, dtype: object"
]
},
"execution_count": 78,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.biography.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Duplicated bios**"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" <th>ext_works_source</th>\n",
" <th>n_ext_work_source</th>\n",
" <th>authoritative</th>\n",
" <th>n_valid_education</th>\n",
" <th>n_valid_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>51306</th>\n",
" <td>0000-0002-7397-7977</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[car title loan upland]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-06 06:10:20.070000+00:00</td>\n",
" <td>2020-11-06 06:24:28.005000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51307</th>\n",
" <td>0000-0003-4931-9736</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[car title loan saratoga]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-13 01:04:19.859000+00:00</td>\n",
" <td>2020-11-13 01:15:12.546000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>106024</th>\n",
" <td>0000-0001-8221-2303</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[car title loan victorville]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-05 00:38:21.096000+00:00</td>\n",
" <td>2020-11-05 00:40:40.091000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>108770</th>\n",
" <td>0000-0001-6736-072X</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-12-08 05:38:30.786000+00:00</td>\n",
" <td>2020-12-08 05:40:03.786000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>108771</th>\n",
" <td>0000-0002-8727-1246</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[loan agency]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[title loan on car, car title loan north ogden...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-12-10 08:54:56.127000+00:00</td>\n",
" <td>2020-12-10 08:57:15.791000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>4</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10875416</th>\n",
" <td>0000-0002-9640-8136</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[car title loan clovis]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-10-22 06:11:02.945000+00:00</td>\n",
" <td>2020-10-22 06:17:09.111000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10878239</th>\n",
" <td>0000-0002-6926-3752</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[car title loan escondido]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-12-03 02:00:33.684000+00:00</td>\n",
" <td>2020-12-03 02:02:07.054000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10933380</th>\n",
" <td>0000-0002-3655-4713</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[car title loan san rafael]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-18 00:39:17.492000+00:00</td>\n",
" <td>2020-11-18 00:52:19.024000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10933381</th>\n",
" <td>0000-0002-8724-1020</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[car title loan san juan capistrano]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-19 00:31:54.080000+00:00</td>\n",
" <td>2020-11-19 00:34:08.721000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10985986</th>\n",
" <td>0000-0002-4601-4569</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>[car title loan mount pleasant]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-10-16 00:32:26.207000+00:00</td>\n",
" <td>2020-10-16 00:37:42.646000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>421 rows × 35 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"51306 0000-0002-7397-7977 True True \n",
"51307 0000-0003-4931-9736 True True \n",
"106024 0000-0001-8221-2303 True True \n",
"108770 0000-0001-6736-072X True True \n",
"108771 0000-0002-8727-1246 True True \n",
"... ... ... ... \n",
"10875416 0000-0002-9640-8136 True True \n",
"10878239 0000-0002-6926-3752 True True \n",
"10933380 0000-0002-3655-4713 True True \n",
"10933381 0000-0002-8724-1020 True True \n",
"10985986 0000-0002-4601-4569 True True \n",
"\n",
" given_names family_name \\\n",
"51306 premium car title loans \n",
"51307 premium car title loans \n",
"106024 premium car title loans \n",
"108770 premium car title loans \n",
"108771 premium car title loans \n",
"... ... ... \n",
"10875416 premium car title loans \n",
"10878239 premium car title loans \n",
"10933380 premium car title loans \n",
"10933381 premium car title loans \n",
"10985986 premium car title loans \n",
"\n",
" biography \\\n",
"51306 car title loans are a more straightforward way... \n",
"51307 car title loans are a more straightforward way... \n",
"106024 car title loans are a more straightforward way... \n",
"108770 car title loans are a more straightforward way... \n",
"108771 car title loans are a more straightforward way... \n",
"... ... \n",
"10875416 car title loans are a more straightforward way... \n",
"10878239 car title loans are a more straightforward way... \n",
"10933380 car title loans are a more straightforward way... \n",
"10933381 car title loans are a more straightforward way... \n",
"10985986 car title loans are a more straightforward way... \n",
"\n",
" other_names primary_email \\\n",
"51306 [premium car title loans] <NA> \n",
"51307 [premium car title loans] <NA> \n",
"106024 [premium car title loans] <NA> \n",
"108770 NaN <NA> \n",
"108771 [loan agency] <NA> \n",
"... ... ... \n",
"10875416 [premium car title loans] <NA> \n",
"10878239 [premium car title loans] <NA> \n",
"10933380 [premium car title loans] <NA> \n",
"10933381 [premium car title loans] <NA> \n",
"10985986 [premium car title loans] <NA> \n",
"\n",
" keywords external_ids \\\n",
"51306 [car title loan upland] NaN \n",
"51307 [car title loan saratoga] NaN \n",
"106024 [car title loan victorville] NaN \n",
"108770 NaN NaN \n",
"108771 [title loan on car, car title loan north ogden... NaN \n",
"... ... ... \n",
"10875416 [car title loan clovis] NaN \n",
"10878239 [car title loan escondido] NaN \n",
"10933380 [car title loan san rafael] NaN \n",
"10933381 [car title loan san juan capistrano] NaN \n",
"10985986 [car title loan mount pleasant] NaN \n",
"\n",
" education employment n_works works_source \\\n",
"51306 NaN NaN 0 NaN \n",
"51307 NaN NaN 0 NaN \n",
"106024 NaN NaN 0 NaN \n",
"108770 NaN NaN 0 NaN \n",
"108771 NaN NaN 0 NaN \n",
"... ... ... ... ... \n",
"10875416 NaN NaN 0 NaN \n",
"10878239 NaN NaN 0 NaN \n",
"10933380 NaN NaN 0 NaN \n",
"10933381 NaN NaN 0 NaN \n",
"10985986 NaN NaN 0 NaN \n",
"\n",
" activation_date last_update_date \\\n",
"51306 2020-11-06 06:10:20.070000+00:00 2020-11-06 06:24:28.005000+00:00 \n",
"51307 2020-11-13 01:04:19.859000+00:00 2020-11-13 01:15:12.546000+00:00 \n",
"106024 2020-11-05 00:38:21.096000+00:00 2020-11-05 00:40:40.091000+00:00 \n",
"108770 2020-12-08 05:38:30.786000+00:00 2020-12-08 05:40:03.786000+00:00 \n",
"108771 2020-12-10 08:54:56.127000+00:00 2020-12-10 08:57:15.791000+00:00 \n",
"... ... ... \n",
"10875416 2020-10-22 06:11:02.945000+00:00 2020-10-22 06:17:09.111000+00:00 \n",
"10878239 2020-12-03 02:00:33.684000+00:00 2020-12-03 02:02:07.054000+00:00 \n",
"10933380 2020-11-18 00:39:17.492000+00:00 2020-11-18 00:52:19.024000+00:00 \n",
"10933381 2020-11-19 00:31:54.080000+00:00 2020-11-19 00:34:08.721000+00:00 \n",
"10985986 2020-10-16 00:32:26.207000+00:00 2020-10-16 00:37:42.646000+00:00 \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"51306 0 0 0 0 False NaN \n",
"51307 0 0 0 0 False NaN \n",
"106024 0 0 0 0 False NaN \n",
"108770 0 0 0 0 False NaN \n",
"108771 0 0 0 0 False NaN \n",
"... ... ... ... ... ... ... \n",
"10875416 0 0 0 0 False NaN \n",
"10878239 0 0 0 0 False NaN \n",
"10933380 0 0 0 0 False NaN \n",
"10933381 0 0 0 0 False NaN \n",
"10985986 0 0 0 0 False NaN \n",
"\n",
" other_email_domains url_domains n_emails n_urls \\\n",
"51306 NaN [premiumcartitleloans.com] <NA> 1 \n",
"51307 NaN [premiumcartitleloans.com] <NA> 1 \n",
"106024 NaN [premiumcartitleloans.com] <NA> 1 \n",
"108770 NaN [premiumcartitleloans.com] <NA> 1 \n",
"108771 NaN [premiumcartitleloans.com] <NA> 1 \n",
"... ... ... ... ... \n",
"10875416 NaN [premiumcartitleloans.com] <NA> 1 \n",
"10878239 NaN [premiumcartitleloans.com] <NA> 1 \n",
"10933380 NaN [premiumcartitleloans.com] <NA> 1 \n",
"10933381 NaN [premiumcartitleloans.com] <NA> 1 \n",
"10985986 NaN [premiumcartitleloans.com] <NA> 1 \n",
"\n",
" n_ids n_keywords n_education n_employment ext_works_source \\\n",
"51306 <NA> 1 <NA> <NA> NaN \n",
"51307 <NA> 1 <NA> <NA> NaN \n",
"106024 <NA> 1 <NA> <NA> NaN \n",
"108770 <NA> <NA> <NA> <NA> NaN \n",
"108771 <NA> 4 <NA> <NA> NaN \n",
"... ... ... ... ... ... \n",
"10875416 <NA> 1 <NA> <NA> NaN \n",
"10878239 <NA> 1 <NA> <NA> NaN \n",
"10933380 <NA> 1 <NA> <NA> NaN \n",
"10933381 <NA> 1 <NA> <NA> NaN \n",
"10985986 <NA> 1 <NA> <NA> NaN \n",
"\n",
" n_ext_work_source authoritative n_valid_education \\\n",
"51306 <NA> False NaN \n",
"51307 <NA> False NaN \n",
"106024 <NA> False NaN \n",
"108770 <NA> False NaN \n",
"108771 <NA> False NaN \n",
"... ... ... ... \n",
"10875416 <NA> False NaN \n",
"10878239 <NA> False NaN \n",
"10933380 <NA> False NaN \n",
"10933381 <NA> False NaN \n",
"10985986 <NA> False NaN \n",
"\n",
" n_valid_employment \n",
"51306 NaN \n",
"51307 NaN \n",
"106024 NaN \n",
"108770 NaN \n",
"108771 NaN \n",
"... ... \n",
"10875416 NaN \n",
"10878239 NaN \n",
"10933380 NaN \n",
"10933381 NaN \n",
"10985986 NaN \n",
"\n",
"[421 rows x 35 columns]"
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[(df.biography.notna()) & (df.biography.str.contains('car title loans are a more straightforward'))]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's note them down"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [],
"source": [
"i = 0\n",
"for orcid in df[(df.biography.notna()) & (df.biography.str.contains('car title loans are a more straightforward'))]['orcid']:\n",
" FAKE_HEAP['carloan_' + str(i)] = orcid\n",
" i = i+1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's check deeper into duplicated bios"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" <th>ext_works_source</th>\n",
" <th>n_ext_work_source</th>\n",
" <th>authoritative</th>\n",
" <th>n_valid_education</th>\n",
" <th>n_valid_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>613</th>\n",
" <td>0000-0001-6750-1481</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>aesthetic</td>\n",
" <td>record</td>\n",
" <td>make your practice easy with a professional so...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-09-28 09:13:52.705000+00:00</td>\n",
" <td>2020-09-28 09:17:36.855000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[aestheticrecord.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1145</th>\n",
" <td>0000-0001-9243-2342</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>alfonso</td>\n",
" <td>ruiz-bravo lopez</td>\n",
" <td>personal docente e investigador de la universi...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, universidad de granada, granada, andalucia...</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>2015-01-19 13:01:31.041000+00:00</td>\n",
" <td>2020-03-10 10:17:07.174000+00:00</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[ugr.es]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1519</th>\n",
" <td>0000-0002-3192-1481</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>amy</td>\n",
" <td>spahn</td>\n",
" <td>research administrator</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[graduate studies, master of science in admin...</td>\n",
" <td>[[research and program administrator, michigan...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-05-04 18:55:27.062000+00:00</td>\n",
" <td>2020-08-04 14:52:03.330000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>2.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2269</th>\n",
" <td>0000-0002-6104-6550</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>antonio</td>\n",
" <td>martinez</td>\n",
" <td>personal docente e investigador de la universi...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>[[researcherid, e-9723-2016]]</td>\n",
" <td>[[geometria y topologia, , universidad de gran...</td>\n",
" <td>[[, universidad de granada, granada, andalucia...</td>\n",
" <td>51</td>\n",
" <td>[researcherid, crossref]</td>\n",
" <td>2015-02-08 18:57:35.978000+00:00</td>\n",
" <td>2020-07-20 07:50:58.740000+00:00</td>\n",
" <td>35</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>46</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[ugr.es]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>[researcherid, crossref]</td>\n",
" <td>2</td>\n",
" <td>True</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6163</th>\n",
" <td>0000-0003-0171-7962</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>eduardo</td>\n",
" <td>ortega bernaldo de quiros</td>\n",
" <td>personal docente e investigador de la universi...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, universidad de granada, granada, andalucia...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-06-01 08:43:00.352000+00:00</td>\n",
" <td>2017-06-19 07:44:32.649000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[ugr.es]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10988461</th>\n",
" <td>0000-0002-7632-8868</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>margarita</td>\n",
" <td>lopez-viota gallardo</td>\n",
" <td>personal docente e investigador de la universi...</td>\n",
" <td>NaN</td>\n",
" <td>mlvg@ugr.es</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, universidad de granada, granada, andalucia...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-07-29 07:17:22.959000+00:00</td>\n",
" <td>2020-11-26 19:55:46.827000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>ugr.es</td>\n",
" <td>NaN</td>\n",
" <td>[ugr.es]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10988510</th>\n",
" <td>0000-0001-5504-2767</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>maría nieves</td>\n",
" <td>gonzález pérez</td>\n",
" <td>personal docente e investigador de la universi...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[lengua española, doctora en estudios lingüís...</td>\n",
" <td>[[profesor asociado, universidad de castilla-l...</td>\n",
" <td>2</td>\n",
" <td>[mla international bibliography, maría nieves ...</td>\n",
" <td>2015-07-09 23:19:08.782000+00:00</td>\n",
" <td>2020-12-08 14:01:30.548000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[uclm.es]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>[mla international bibliography]</td>\n",
" <td>1</td>\n",
" <td>True</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10988922</th>\n",
" <td>0000-0001-9501-3717</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>roxann</td>\n",
" <td>lynsey</td>\n",
" <td>een korte inleiding op waterverwarmingstoestel...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-12-10 21:40:39.371000+00:00</td>\n",
" <td>2020-12-10 21:49:33.129000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[zonneboilermagazijn.nl]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10988929</th>\n",
" <td>0000-0003-2323-4005</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>ruland</td>\n",
" <td>star</td>\n",
" <td>tentu saja semua permainan yang disediakan aka...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-24 13:18:00.296000+00:00</td>\n",
" <td>2020-11-24 13:21:10.985000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10989073</th>\n",
" <td>0000-0003-4244-0381</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>sumiko</td>\n",
" <td>cesar</td>\n",
" <td>welcome to my page! im still finding my way ar...</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-12-13 00:34:15.955000+00:00</td>\n",
" <td>2020-12-13 00:37:21.793000+00:00</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[139.59.245.36]</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>1</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>NaN</td>\n",
" <td>&lt;NA&gt;</td>\n",
" <td>False</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>19571 rows × 35 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"613 0000-0001-6750-1481 True True \n",
"1145 0000-0001-9243-2342 True True \n",
"1519 0000-0002-3192-1481 True True \n",
"2269 0000-0002-6104-6550 True True \n",
"6163 0000-0003-0171-7962 True True \n",
"... ... ... ... \n",
"10988461 0000-0002-7632-8868 True True \n",
"10988510 0000-0001-5504-2767 True True \n",
"10988922 0000-0001-9501-3717 True True \n",
"10988929 0000-0003-2323-4005 True True \n",
"10989073 0000-0003-4244-0381 True True \n",
"\n",
" given_names family_name \\\n",
"613 aesthetic record \n",
"1145 alfonso ruiz-bravo lopez \n",
"1519 amy spahn \n",
"2269 antonio martinez \n",
"6163 eduardo ortega bernaldo de quiros \n",
"... ... ... \n",
"10988461 margarita lopez-viota gallardo \n",
"10988510 maría nieves gonzález pérez \n",
"10988922 roxann lynsey \n",
"10988929 ruland star \n",
"10989073 sumiko cesar \n",
"\n",
" biography other_names \\\n",
"613 make your practice easy with a professional so... NaN \n",
"1145 personal docente e investigador de la universi... NaN \n",
"1519 research administrator NaN \n",
"2269 personal docente e investigador de la universi... NaN \n",
"6163 personal docente e investigador de la universi... NaN \n",
"... ... ... \n",
"10988461 personal docente e investigador de la universi... NaN \n",
"10988510 personal docente e investigador de la universi... NaN \n",
"10988922 een korte inleiding op waterverwarmingstoestel... NaN \n",
"10988929 tentu saja semua permainan yang disediakan aka... NaN \n",
"10989073 welcome to my page! im still finding my way ar... NaN \n",
"\n",
" primary_email keywords external_ids \\\n",
"613 <NA> NaN NaN \n",
"1145 <NA> NaN NaN \n",
"1519 <NA> NaN NaN \n",
"2269 <NA> NaN [[researcherid, e-9723-2016]] \n",
"6163 <NA> NaN NaN \n",
"... ... ... ... \n",
"10988461 mlvg@ugr.es NaN NaN \n",
"10988510 <NA> NaN NaN \n",
"10988922 <NA> NaN NaN \n",
"10988929 <NA> NaN NaN \n",
"10989073 <NA> NaN NaN \n",
"\n",
" education \\\n",
"613 NaN \n",
"1145 NaN \n",
"1519 [[graduate studies, master of science in admin... \n",
"2269 [[geometria y topologia, , universidad de gran... \n",
"6163 NaN \n",
"... ... \n",
"10988461 NaN \n",
"10988510 [[lengua española, doctora en estudios lingüís... \n",
"10988922 NaN \n",
"10988929 NaN \n",
"10989073 NaN \n",
"\n",
" employment n_works \\\n",
"613 NaN 0 \n",
"1145 [[, universidad de granada, granada, andalucia... 1 \n",
"1519 [[research and program administrator, michigan... 0 \n",
"2269 [[, universidad de granada, granada, andalucia... 51 \n",
"6163 [[, universidad de granada, granada, andalucia... 0 \n",
"... ... ... \n",
"10988461 [[, universidad de granada, granada, andalucia... 0 \n",
"10988510 [[profesor asociado, universidad de castilla-l... 2 \n",
"10988922 NaN 0 \n",
"10988929 NaN 0 \n",
"10989073 NaN 0 \n",
"\n",
" works_source \\\n",
"613 NaN \n",
"1145 [crossref] \n",
"1519 NaN \n",
"2269 [researcherid, crossref] \n",
"6163 NaN \n",
"... ... \n",
"10988461 NaN \n",
"10988510 [mla international bibliography, maría nieves ... \n",
"10988922 NaN \n",
"10988929 NaN \n",
"10989073 NaN \n",
"\n",
" activation_date last_update_date \\\n",
"613 2020-09-28 09:13:52.705000+00:00 2020-09-28 09:17:36.855000+00:00 \n",
"1145 2015-01-19 13:01:31.041000+00:00 2020-03-10 10:17:07.174000+00:00 \n",
"1519 2020-05-04 18:55:27.062000+00:00 2020-08-04 14:52:03.330000+00:00 \n",
"2269 2015-02-08 18:57:35.978000+00:00 2020-07-20 07:50:58.740000+00:00 \n",
"6163 2015-06-01 08:43:00.352000+00:00 2017-06-19 07:44:32.649000+00:00 \n",
"... ... ... \n",
"10988461 2015-07-29 07:17:22.959000+00:00 2020-11-26 19:55:46.827000+00:00 \n",
"10988510 2015-07-09 23:19:08.782000+00:00 2020-12-08 14:01:30.548000+00:00 \n",
"10988922 2020-12-10 21:40:39.371000+00:00 2020-12-10 21:49:33.129000+00:00 \n",
"10988929 2020-11-24 13:18:00.296000+00:00 2020-11-24 13:21:10.985000+00:00 \n",
"10989073 2020-12-13 00:34:15.955000+00:00 2020-12-13 00:37:21.793000+00:00 \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"613 0 0 0 0 False NaN \n",
"1145 1 0 0 0 True NaN \n",
"1519 0 0 0 0 False NaN \n",
"2269 35 0 0 46 True NaN \n",
"6163 0 0 0 0 False NaN \n",
"... ... ... ... ... ... ... \n",
"10988461 0 0 0 0 False ugr.es \n",
"10988510 0 0 0 1 False NaN \n",
"10988922 0 0 0 0 False NaN \n",
"10988929 0 0 0 0 False NaN \n",
"10989073 0 0 0 0 False NaN \n",
"\n",
" other_email_domains url_domains n_emails n_urls \\\n",
"613 NaN [aestheticrecord.com] <NA> 1 \n",
"1145 NaN [ugr.es] <NA> 1 \n",
"1519 NaN NaN <NA> <NA> \n",
"2269 NaN [ugr.es] <NA> 1 \n",
"6163 NaN [ugr.es] <NA> 1 \n",
"... ... ... ... ... \n",
"10988461 NaN [ugr.es] <NA> 1 \n",
"10988510 NaN [uclm.es] <NA> 1 \n",
"10988922 NaN [zonneboilermagazijn.nl] <NA> 1 \n",
"10988929 NaN [google.com] <NA> 1 \n",
"10989073 NaN [139.59.245.36] <NA> 1 \n",
"\n",
" n_ids n_keywords n_education n_employment \\\n",
"613 <NA> <NA> <NA> <NA> \n",
"1145 <NA> <NA> <NA> 1 \n",
"1519 <NA> <NA> 2 3 \n",
"2269 1 <NA> 1 1 \n",
"6163 <NA> <NA> <NA> 1 \n",
"... ... ... ... ... \n",
"10988461 <NA> <NA> <NA> 1 \n",
"10988510 <NA> <NA> 1 1 \n",
"10988922 <NA> <NA> <NA> <NA> \n",
"10988929 <NA> <NA> <NA> <NA> \n",
"10989073 <NA> <NA> <NA> <NA> \n",
"\n",
" ext_works_source n_ext_work_source authoritative \\\n",
"613 NaN <NA> False \n",
"1145 [crossref] 1 True \n",
"1519 NaN <NA> False \n",
"2269 [researcherid, crossref] 2 True \n",
"6163 NaN <NA> False \n",
"... ... ... ... \n",
"10988461 NaN <NA> False \n",
"10988510 [mla international bibliography] 1 True \n",
"10988922 NaN <NA> False \n",
"10988929 NaN <NA> False \n",
"10989073 NaN <NA> False \n",
"\n",
" n_valid_education n_valid_employment \n",
"613 NaN NaN \n",
"1145 NaN 0.0 \n",
"1519 2.0 2.0 \n",
"2269 1.0 0.0 \n",
"6163 NaN 0.0 \n",
"... ... ... \n",
"10988461 NaN 0.0 \n",
"10988510 1.0 1.0 \n",
"10988922 NaN NaN \n",
"10988929 NaN NaN \n",
"10989073 NaN NaN \n",
"\n",
"[19571 rows x 35 columns]"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[(df.biography.notna()) & (df.biography.duplicated(keep=False))]"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" </tr>\n",
" <tr>\n",
" <th>biography</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>car title loans are a more straightforward way to borrow the money you need, without dealing with the hassles of a traditional bank loan. because they use the equity value of your qualifying vehicle to secure funding, they are a great borrowing option for customers with credit issues, or who need expedited funding. our customers know to turn to our local experts when they need auto title loans. our team is excited to get you your loan today!</th>\n",
" <td>343</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hi, how are you? it is really cool to find an entire community of people interested in the same thing you are.</th>\n",
" <td>229</td>\n",
" </tr>\n",
" <tr>\n",
" <th>the sound and the fury is one of my all-time favorite novels but i have many.</th>\n",
" <td>218</td>\n",
" </tr>\n",
" <tr>\n",
" <th>悪意に満ちたバイアス偏った記事がいまなお健在だという好例の記事を見つけた。知識層が最も好むとされる大手新聞8月24日付)の朝刊記事だ。グリホサートという除草剤が発がん性や胎児への影響をもたらすと指摘する記事だが、先進国の公的機関は明確に否定している。こういう記事が続く限り、活字メディアはいよいよ専門家から見放されるだろうとの思いを強くする。</th>\n",
" <td>137</td>\n",
" </tr>\n",
" <tr>\n",
" <th>one of my passions is people watching but i dont get to do it as much as i would like.</th>\n",
" <td>132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>environmental engineering</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>insaat kalip yagi, kalip yag, plywood kalip yagi, ahsap kalip yagi alanlarinda profesyonel ve organik olarak imalat yapan sirketimiz musteri goruslerini son derece onemsemektedir.</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>&lt;p class=p__7&gt;since life and medical insurance commissions are front-loaded, agents generally do not get a commission after the 3rd policy renewal.</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>professor of otolaryngology</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ive traveled to several countries and have several more to see. i have a lizard named tinky.</th>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2563 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid\n",
"biography \n",
"car title loans are a more straightforward way ... 343\n",
"hi, how are you? it is really cool to find an e... 229\n",
"the sound and the fury is one of my all-time fa... 218\n",
"悪意に満ちたバイアス(偏った)記事がいまなお健在だという好例の記事を見つけた。知識層が最も好む... 137\n",
"one of my passions is people watching but i don... 132\n",
"... ...\n",
"environmental engineering 2\n",
"insaat kalip yagi, kalip yag, plywood kalip yag... 2\n",
"<p class=p__7>since life and medical insurance ... 2\n",
"professor of otolaryngology 2\n",
"ive traveled to several countries and have seve... 2\n",
"\n",
"[2563 rows x 1 columns]"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_bios = df[['orcid', 'biography']].groupby('biography').count().sort_values('orcid', ascending=False)\n",
"dup_bios = dup_bios[dup_bios.orcid > 1]\n",
"dup_bios"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"orcid 19571\n",
"dtype: int64"
]
},
"execution_count": 83,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_bios.sum()"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
"# dup_bios.to_csv('../data/processed/dup_bios.csv', index=True, columns=[], header=False)\n",
"dup_bios.to_csv('../data/processed/dup_bios.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"I noticed that some bios can be found on google in other (probably fake) accounts.\n",
"E.g. \"hi, how are you? it is really cool to find an entire community of people interested in the same thing you are.\"\n",
"can be found on https://dribbble.com/camrodoabh/about"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Dup bios URLs**"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's plot the domains dup bios point to"
]
},
{
"cell_type": "code",
"execution_count": 132,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"image-perth.org",
"doodlekit.com",
"bolasportsbo.com",
"penzu.com",
"google.com",
"tumblr.com",
"diigo.com",
"skyrock.com",
"lucialpiazzale.com",
"wordpress.com",
"yousher.com",
"theglensecret.com",
"wpsuo.com",
"jigsy.com",
"iamarrows.com",
"hatenablog.com",
"gumroad.com",
"tearosediner.net",
"nation2.com",
"edublogs.org",
"postheaven.net",
"theburnward.com",
"almoheet-travel.com",
"zenwriting.net",
"writeablog.net",
"blogfreely.net",
"marijuanaforsalecheap.com",
"amazonaws.com",
"sinhly16.net",
"shutterfly.com",
"uberant.com",
"wonderwall.bar",
"wixsite.com",
"telegra.ph",
"mnetworks.dk",
"topsitenet.com",
"channeldoubler.co.uk",
"bt21fans.com",
"buymarijuanaonline.co.uk",
"trademama.com",
"timeforchangecounselling.com",
"qualtrics.com",
"site123.me",
"postach.io",
"simplesite.com",
"potenzmittel-online-bestellen.com",
"shikshadarpan.com",
"semillasfeminizadas.com.es",
"yalieswriters.us",
"yingjiesheng.com"
],
"y": [
6,
6,
6,
5,
5,
5,
5,
4,
4,
4,
4,
4,
4,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
1,
1,
1,
1,
1,
1,
1,
1,
1
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "URL distribution for bio \"really cool to find an entire community of people\""
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"598bbaeb-effa-4943-9993-93e13191411e\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"598bbaeb-effa-4943-9993-93e13191411e\")) { Plotly.newPlot( \"598bbaeb-effa-4943-9993-93e13191411e\", [{\"type\": \"bar\", \"x\": [\"image-perth.org\", \"doodlekit.com\", \"bolasportsbo.com\", \"penzu.com\", \"google.com\", \"tumblr.com\", \"diigo.com\", \"skyrock.com\", \"lucialpiazzale.com\", \"wordpress.com\", \"yousher.com\", \"theglensecret.com\", \"wpsuo.com\", \"jigsy.com\", \"iamarrows.com\", \"hatenablog.com\", \"gumroad.com\", \"tearosediner.net\", \"nation2.com\", \"edublogs.org\", \"postheaven.net\", \"theburnward.com\", \"almoheet-travel.com\", \"zenwriting.net\", \"writeablog.net\", \"blogfreely.net\", \"marijuanaforsalecheap.com\", \"amazonaws.com\", \"sinhly16.net\", \"shutterfly.com\", \"uberant.com\", \"wonderwall.bar\", \"wixsite.com\", \"telegra.ph\", \"mnetworks.dk\", \"topsitenet.com\", \"channeldoubler.co.uk\", \"bt21fans.com\", \"buymarijuanaonline.co.uk\", \"trademama.com\", \"timeforchangecounselling.com\", \"qualtrics.com\", \"site123.me\", \"postach.io\", \"simplesite.com\", \"potenzmittel-online-bestellen.com\", \"shikshadarpan.com\", \"semillasfeminizadas.com.es\", \"yalieswriters.us\", \"yingjiesheng.com\"], \"y\": [6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"URL distribution for bio \\\"really cool to find an entire community of people\\\"\"}, \"xaxis\": {\"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('598bbaeb-effa-4943-9993-93e13191411e');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"BIO_SNIPPET = 'really cool to find an entire community of people'\n",
"dup_bios_df = df[df.biography.str.contains(BIO_SNIPPET)].explode('url_domains').groupby('url_domains')[['orcid']].count().sort_values('orcid', ascending=False)\n",
"\n",
"set_top_n(50)\n",
"data = [\n",
" go.Bar(\n",
" x=dup_bios_df[:TOP_N].index,\n",
" y=dup_bios_df[:TOP_N]['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='URL distribution for bio \"%s\"' % BIO_SNIPPET,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Dup bios dates**"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"histfunc": "count",
"type": "histogram",
"x": [
"2020-11-06T06:10:20.070000+00:00",
"2020-11-13T01:04:19.859000+00:00",
"2020-11-05T00:38:21.096000+00:00",
"2020-12-08T05:38:30.786000+00:00",
"2020-12-10T08:54:56.127000+00:00",
"2020-11-20T09:11:08.356000+00:00",
"2020-12-10T05:26:14.534000+00:00",
"2020-12-04T02:41:11.756000+00:00",
"2020-11-26T04:16:15.824000+00:00",
"2020-10-12T04:58:17.220000+00:00",
"2020-12-08T00:35:36.543000+00:00",
"2020-11-30T01:30:22.357000+00:00",
"2020-10-19T01:51:35.391000+00:00",
"2020-12-07T04:43:46.569000+00:00",
"2020-11-12T06:33:38.112000+00:00",
"2020-10-20T05:55:09.939000+00:00",
"2020-11-18T02:15:05.122000+00:00",
"2020-12-04T00:53:17.885000+00:00",
"2020-10-16T02:03:07.922000+00:00",
"2020-12-07T01:05:49.858000+00:00",
"2020-12-09T09:51:39.412000+00:00",
"2020-10-16T05:25:52.218000+00:00",
"2021-01-04T15:49:35.727000+00:00",
"2020-12-21T13:42:08.792000+00:00",
"2020-12-07T05:45:44.145000+00:00",
"2020-12-01T06:01:05.133000+00:00",
"2020-12-10T03:52:43.220000+00:00",
"2020-12-08T10:18:31.859000+00:00",
"2020-12-22T13:49:16.908000+00:00",
"2020-12-04T04:35:16.628000+00:00",
"2020-12-04T06:16:50.173000+00:00",
"2020-11-23T01:24:52.965000+00:00",
"2020-12-18T13:09:47.463000+00:00",
"2020-12-09T03:17:51.528000+00:00",
"2020-10-01T00:37:22.535000+00:00",
"2020-12-07T04:11:41.932000+00:00",
"2020-12-10T02:07:49.921000+00:00",
"2020-12-07T02:03:15.901000+00:00",
"2020-12-08T01:10:25.988000+00:00",
"2020-12-09T02:50:31.195000+00:00",
"2020-12-10T02:31:54.901000+00:00",
"2020-12-04T06:49:59.988000+00:00",
"2020-12-20T14:35:28.033000+00:00",
"2020-11-13T02:36:00.122000+00:00",
"2020-12-02T01:51:22.669000+00:00",
"2020-11-18T00:37:25.591000+00:00",
"2020-11-05T01:53:15.027000+00:00",
"2020-10-09T00:29:08.619000+00:00",
"2020-10-28T01:35:34.572000+00:00",
"2020-10-05T04:42:51.206000+00:00",
"2020-12-11T07:30:16.966000+00:00",
"2020-11-27T05:05:54.359000+00:00",
"2020-12-04T01:13:35.987000+00:00",
"2020-12-04T02:52:36.139000+00:00",
"2020-12-09T04:01:45.997000+00:00",
"2020-12-07T07:27:31.433000+00:00",
"2020-11-02T01:46:58.533000+00:00",
"2020-11-16T01:34:33.962000+00:00",
"2020-10-08T00:35:13.677000+00:00",
"2020-12-07T01:01:40.495000+00:00",
"2020-10-08T00:40:42.151000+00:00",
"2020-12-09T16:53:58.895000+00:00",
"2020-12-03T04:17:44.994000+00:00",
"2020-10-29T00:32:03.420000+00:00",
"2020-10-21T00:33:19.700000+00:00",
"2020-12-07T02:01:03.230000+00:00",
"2020-11-27T03:03:57.166000+00:00",
"2020-11-20T00:31:02.803000+00:00",
"2020-12-10T00:44:32.141000+00:00",
"2020-11-06T03:30:11.523000+00:00",
"2020-12-08T01:31:33.785000+00:00",
"2020-12-03T04:42:29.095000+00:00",
"2020-11-30T03:07:50.342000+00:00",
"2020-12-10T03:57:58.215000+00:00",
"2020-12-08T05:30:35.820000+00:00",
"2020-10-19T01:48:20.788000+00:00",
"2020-12-10T02:03:52.708000+00:00",
"2020-11-02T00:49:49.169000+00:00",
"2020-10-23T02:48:08.535000+00:00",
"2020-12-04T07:08:18.268000+00:00",
"2020-12-09T05:49:49.132000+00:00",
"2020-11-19T04:33:54.243000+00:00",
"2020-11-20T08:31:18.007000+00:00",
"2020-12-24T09:34:06.934000+00:00",
"2020-11-12T05:14:22.824000+00:00",
"2020-10-01T02:31:37.319000+00:00",
"2020-12-03T06:09:53.307000+00:00",
"2020-12-08T06:22:48.990000+00:00",
"2020-10-02T00:35:57.966000+00:00",
"2020-10-23T01:46:33.848000+00:00",
"2020-12-11T00:42:31.593000+00:00",
"2020-12-08T01:58:02.205000+00:00",
"2020-12-04T07:31:58.866000+00:00",
"2020-10-23T07:14:02.472000+00:00",
"2020-12-09T00:36:57.747000+00:00",
"2020-11-23T02:12:15.138000+00:00",
"2020-12-01T05:56:25.276000+00:00",
"2020-10-19T00:36:24.880000+00:00",
"2020-10-01T02:14:30.144000+00:00",
"2020-10-06T04:09:04.132000+00:00",
"2020-11-04T07:11:18.875000+00:00",
"2020-12-07T02:01:23.937000+00:00",
"2020-11-25T00:29:34.048000+00:00",
"2020-10-20T04:34:33.072000+00:00",
"2020-12-07T09:28:58.928000+00:00",
"2020-12-10T06:14:55.197000+00:00",
"2020-11-24T01:59:39.104000+00:00",
"2020-12-10T10:30:17.368000+00:00",
"2020-12-04T03:52:11.968000+00:00",
"2020-11-03T01:36:12.954000+00:00",
"2020-12-08T20:44:30.147000+00:00",
"2020-11-30T02:53:09.083000+00:00",
"2020-10-23T07:15:19.894000+00:00",
"2020-10-12T01:51:02.464000+00:00",
"2020-11-03T00:35:56.866000+00:00",
"2020-11-02T03:00:22.222000+00:00",
"2020-12-08T07:56:01.181000+00:00",
"2020-11-09T01:54:10.680000+00:00",
"2020-12-09T02:20:15.699000+00:00",
"2020-12-10T03:39:00.227000+00:00",
"2020-12-08T06:04:10.336000+00:00",
"2020-12-09T05:14:08.617000+00:00",
"2020-12-23T09:36:08.353000+00:00",
"2020-10-15T05:07:24.672000+00:00",
"2020-12-10T00:37:21.558000+00:00",
"2020-10-20T03:04:21.659000+00:00",
"2020-12-08T07:53:40.113000+00:00",
"2020-10-07T03:03:36.133000+00:00",
"2020-10-28T01:58:35.294000+00:00",
"2020-11-26T00:29:40.992000+00:00",
"2020-11-17T07:17:47.484000+00:00",
"2020-12-02T02:29:02.429000+00:00",
"2020-10-29T05:26:54.912000+00:00",
"2020-12-02T22:59:57.295000+00:00",
"2020-10-13T04:45:07.662000+00:00",
"2020-12-18T17:12:04.145000+00:00",
"2020-10-15T02:09:30.964000+00:00",
"2020-11-13T08:00:46.351000+00:00",
"2020-12-01T06:14:27.962000+00:00",
"2020-12-10T07:02:35.739000+00:00",
"2020-10-26T00:26:09.410000+00:00",
"2020-11-04T00:58:53.122000+00:00",
"2020-10-26T06:21:04.196000+00:00",
"2020-12-10T07:07:19.553000+00:00",
"2020-11-25T00:49:47.126000+00:00",
"2020-11-18T04:22:49.488000+00:00",
"2020-11-27T01:33:55.500000+00:00",
"2020-12-22T09:45:55.961000+00:00",
"2020-10-26T01:29:08.608000+00:00",
"2020-12-08T02:45:05.088000+00:00",
"2020-10-20T01:49:48.227000+00:00",
"2020-12-08T01:05:07.944000+00:00",
"2020-12-09T07:20:54.278000+00:00",
"2020-10-12T00:32:18.880000+00:00",
"2020-11-26T02:04:06.845000+00:00",
"2020-11-27T00:42:50.071000+00:00",
"2020-12-09T00:42:15.741000+00:00",
"2020-10-13T01:33:56.576000+00:00",
"2020-12-09T00:55:26.653000+00:00",
"2020-12-02T00:34:06.686000+00:00",
"2020-11-25T04:24:02.933000+00:00",
"2020-10-20T00:35:19.784000+00:00",
"2020-12-08T07:18:00.879000+00:00",
"2020-10-05T05:31:24.831000+00:00",
"2020-11-26T06:10:56.539000+00:00",
"2020-10-16T00:59:50.730000+00:00",
"2020-10-05T00:44:54.638000+00:00",
"2020-11-24T03:32:10.726000+00:00",
"2020-10-29T02:14:36.912000+00:00",
"2020-10-28T04:32:29.960000+00:00",
"2020-12-03T02:27:55.773000+00:00",
"2020-10-13T03:09:38.953000+00:00",
"2020-11-03T00:32:33.060000+00:00",
"2020-11-23T00:28:42.098000+00:00",
"2020-10-05T07:23:56.871000+00:00",
"2020-12-08T07:41:30.994000+00:00",
"2020-10-13T01:53:14.768000+00:00",
"2020-11-19T07:33:12.511000+00:00",
"2020-11-19T02:22:58.970000+00:00",
"2020-12-03T01:02:12.893000+00:00",
"2020-12-10T09:19:53.009000+00:00",
"2020-12-03T01:57:58.432000+00:00",
"2020-11-12T01:53:16.920000+00:00",
"2020-12-10T05:08:26.115000+00:00",
"2020-12-03T03:19:39.822000+00:00",
"2020-11-25T06:51:58.033000+00:00",
"2020-11-03T06:11:51.922000+00:00",
"2020-10-27T04:51:49.250000+00:00",
"2020-12-01T07:40:33.026000+00:00",
"2020-11-24T00:39:57.593000+00:00",
"2020-12-11T07:22:37.229000+00:00",
"2020-11-06T01:49:14.557000+00:00",
"2020-12-09T00:46:22.474000+00:00",
"2020-10-09T00:07:27.401000+00:00",
"2020-10-21T04:56:50.213000+00:00",
"2020-12-09T00:38:12.761000+00:00",
"2020-10-19T03:09:40.210000+00:00",
"2020-12-08T07:42:26.163000+00:00",
"2020-12-09T04:41:45.743000+00:00",
"2020-12-10T00:38:34.572000+00:00",
"2020-10-06T04:23:17.821000+00:00",
"2020-10-01T04:41:04.450000+00:00",
"2020-10-29T01:59:03.954000+00:00",
"2020-12-08T03:35:22.720000+00:00",
"2020-12-10T02:17:41.809000+00:00",
"2020-11-23T07:45:27.508000+00:00",
"2020-12-09T07:32:59.237000+00:00",
"2020-11-16T04:30:45.959000+00:00",
"2020-10-26T02:57:43.771000+00:00",
"2020-12-10T02:57:23.162000+00:00",
"2020-11-27T04:06:45.649000+00:00",
"2020-12-08T00:20:19.678000+00:00",
"2020-10-15T01:44:03.511000+00:00",
"2020-10-14T01:42:51.911000+00:00",
"2020-10-30T00:42:28.296000+00:00",
"2020-10-26T00:37:39.903000+00:00",
"2020-10-26T05:10:04.027000+00:00",
"2020-10-14T01:46:26.356000+00:00",
"2020-10-05T06:03:47.887000+00:00",
"2020-10-22T03:10:26.276000+00:00",
"2020-12-08T05:41:34.740000+00:00",
"2020-12-07T06:59:30.009000+00:00",
"2020-10-06T05:41:06.471000+00:00",
"2020-11-27T02:12:42.102000+00:00",
"2020-12-08T01:53:47.265000+00:00",
"2020-10-27T05:22:02.063000+00:00",
"2020-12-08T04:12:44.753000+00:00",
"2020-10-13T00:37:29.576000+00:00",
"2020-12-02T03:46:17.210000+00:00",
"2020-12-08T00:33:43.254000+00:00",
"2020-12-09T03:34:43.573000+00:00",
"2020-10-07T00:40:13.702000+00:00",
"2020-12-10T00:21:20.069000+00:00",
"2020-12-03T18:40:50.995000+00:00",
"2020-12-09T15:11:47.332000+00:00",
"2020-12-26T05:24:05.862000+00:00",
"2020-10-06T00:43:30.492000+00:00",
"2020-11-02T05:03:21.859000+00:00",
"2020-10-09T03:10:19.759000+00:00",
"2020-12-10T06:32:56.543000+00:00",
"2020-10-14T00:31:14.753000+00:00",
"2020-10-15T03:07:59.357000+00:00",
"2020-10-09T02:05:42.975000+00:00",
"2020-11-10T04:06:00.132000+00:00",
"2020-11-19T06:10:05.185000+00:00",
"2020-10-07T04:57:02.723000+00:00",
"2020-11-10T05:20:38.400000+00:00",
"2020-12-01T03:16:28.604000+00:00",
"2020-12-11T00:22:57.205000+00:00",
"2020-12-08T00:40:31.488000+00:00",
"2020-12-09T03:57:54.146000+00:00",
"2020-11-02T07:45:42.838000+00:00",
"2020-10-16T03:43:52.975000+00:00",
"2020-12-07T02:34:45.335000+00:00",
"2020-12-10T05:26:59.077000+00:00",
"2020-10-06T02:51:09.557000+00:00",
"2020-12-03T17:36:44.267000+00:00",
"2020-12-09T07:15:59.846000+00:00",
"2020-12-09T04:20:24.180000+00:00",
"2020-10-12T00:29:41.477000+00:00",
"2020-10-08T03:09:24.761000+00:00",
"2020-11-10T00:44:33.074000+00:00",
"2020-10-06T00:44:53.643000+00:00",
"2020-10-29T00:40:45.821000+00:00",
"2020-10-22T01:40:49.877000+00:00",
"2020-10-05T00:45:40.224000+00:00",
"2020-12-04T05:56:11.632000+00:00",
"2020-12-09T03:12:40.086000+00:00",
"2020-12-07T01:16:29.015000+00:00",
"2020-10-13T00:32:41.200000+00:00",
"2020-12-11T05:10:03.095000+00:00",
"2020-10-14T04:47:45.211000+00:00",
"2020-11-09T03:37:34.513000+00:00",
"2020-11-09T00:41:45.173000+00:00",
"2020-12-08T01:50:10.568000+00:00",
"2020-11-24T01:51:28.207000+00:00",
"2020-12-10T01:52:37.083000+00:00",
"2020-12-22T11:47:29.012000+00:00",
"2020-10-01T01:14:36.461000+00:00",
"2020-12-07T07:24:21.357000+00:00",
"2020-11-05T04:56:52.777000+00:00",
"2020-12-11T05:15:34.655000+00:00",
"2020-11-05T03:01:48.301000+00:00",
"2020-11-11T03:56:13.111000+00:00",
"2020-12-09T06:11:25.359000+00:00",
"2020-12-09T04:28:58.267000+00:00",
"2020-10-30T07:19:59.994000+00:00",
"2020-10-07T06:09:58.118000+00:00",
"2020-11-04T02:32:20.006000+00:00",
"2020-12-08T00:34:59.437000+00:00",
"2020-12-10T03:32:11.013000+00:00",
"2020-10-27T02:55:23.288000+00:00",
"2020-11-06T01:46:33.352000+00:00",
"2020-12-07T02:28:40.834000+00:00",
"2020-11-04T07:18:51.293000+00:00",
"2020-10-08T04:37:16.253000+00:00",
"2020-12-09T08:26:09.172000+00:00",
"2020-12-09T06:41:51.112000+00:00",
"2020-11-03T02:49:25.793000+00:00",
"2020-11-06T00:38:50.208000+00:00",
"2020-10-21T01:39:42.463000+00:00",
"2020-12-08T02:04:35.965000+00:00",
"2020-12-27T12:39:28.524000+00:00",
"2020-11-20T05:49:25.708000+00:00",
"2020-12-04T06:25:21.013000+00:00",
"2020-11-16T00:26:54.233000+00:00",
"2020-11-03T04:40:55.485000+00:00",
"2020-11-25T08:24:26.620000+00:00",
"2020-10-28T00:33:46.915000+00:00",
"2020-12-10T06:01:53.737000+00:00",
"2020-11-04T05:13:14.840000+00:00",
"2020-11-18T05:44:57.361000+00:00",
"2020-12-07T00:36:46.723000+00:00",
"2020-11-16T02:42:38.500000+00:00",
"2020-11-30T00:27:02.900000+00:00",
"2020-12-10T02:04:37.554000+00:00",
"2020-12-09T02:02:01.646000+00:00",
"2020-12-07T03:42:54.669000+00:00",
"2020-12-27T03:56:52.049000+00:00",
"2020-11-09T00:38:44.818000+00:00",
"2020-12-07T06:47:43.421000+00:00",
"2020-12-10T04:13:00.551000+00:00",
"2020-12-26T15:17:09.480000+00:00",
"2020-11-05T02:09:35.869000+00:00",
"2020-12-08T02:01:25.880000+00:00",
"2020-11-19T00:47:12.305000+00:00",
"2020-10-09T01:56:49.349000+00:00",
"2020-10-15T00:32:01.688000+00:00",
"2020-10-27T02:41:01.334000+00:00",
"2020-11-18T02:12:25.671000+00:00",
"2020-10-07T04:58:13.870000+00:00",
"2020-12-07T02:20:06.535000+00:00",
"2020-10-12T03:10:36.308000+00:00",
"2020-10-29T03:18:20.673000+00:00",
"2020-12-09T08:06:54.526000+00:00",
"2020-11-12T03:17:05.140000+00:00",
"2020-10-01T01:15:09.993000+00:00",
"2020-11-11T00:51:14.004000+00:00",
"2020-11-26T01:43:11.269000+00:00",
"2020-11-24T00:43:00.335000+00:00",
"2020-11-30T04:44:39.496000+00:00",
"2020-10-23T00:30:18.009000+00:00",
"2020-10-16T02:12:55.314000+00:00",
"2020-12-02T06:24:05.665000+00:00",
"2021-01-20T10:24:17.206000+00:00",
"2020-11-05T00:36:01.999000+00:00",
"2020-12-02T02:41:16.199000+00:00",
"2020-12-03T00:30:22.897000+00:00",
"2020-12-10T03:39:20.103000+00:00",
"2020-11-11T02:04:59.354000+00:00",
"2020-12-08T06:38:19.302000+00:00",
"2020-12-01T00:35:19.625000+00:00",
"2020-11-11T05:45:47.110000+00:00",
"2020-10-28T00:35:44.890000+00:00",
"2020-12-07T04:16:41.797000+00:00",
"2020-11-26T02:16:11.427000+00:00",
"2020-11-23T02:32:21.420000+00:00",
"2020-11-30T03:20:48.098000+00:00",
"2020-12-09T00:51:34.359000+00:00",
"2020-12-21T15:37:42.646000+00:00",
"2020-12-04T02:14:35.901000+00:00",
"2020-12-04T04:47:02.247000+00:00",
"2020-10-07T00:37:11.738000+00:00",
"2020-11-10T01:59:01.119000+00:00",
"2020-10-02T02:48:15.987000+00:00",
"2020-10-22T01:52:32.093000+00:00",
"2020-11-23T00:21:54.782000+00:00",
"2020-12-08T06:25:58.310000+00:00",
"2020-10-02T01:45:44.168000+00:00",
"2020-11-09T02:00:09.022000+00:00",
"2020-11-30T01:55:34.221000+00:00",
"2020-12-07T00:32:11.199000+00:00",
"2020-12-06T17:32:11.180000+00:00",
"2020-11-13T05:32:08.198000+00:00",
"2020-12-08T07:42:30.666000+00:00",
"2020-10-30T00:41:50.497000+00:00",
"2020-10-21T00:36:51.459000+00:00",
"2020-10-22T00:30:26.343000+00:00",
"2020-10-14T00:33:42.972000+00:00",
"2020-11-26T02:58:59.509000+00:00",
"2020-10-08T04:50:07.561000+00:00",
"2020-10-21T02:47:27.539000+00:00",
"2020-11-12T00:38:10.297000+00:00",
"2020-11-20T00:59:49.538000+00:00",
"2020-12-09T01:59:11.916000+00:00",
"2020-12-11T04:13:54.427000+00:00",
"2020-10-02T03:18:52.565000+00:00",
"2020-12-07T04:29:04.602000+00:00",
"2020-12-06T18:46:28.549000+00:00",
"2020-10-19T00:30:26.334000+00:00",
"2020-12-10T02:10:10.657000+00:00",
"2020-12-08T00:49:04.595000+00:00",
"2020-10-14T02:59:56.059000+00:00",
"2020-10-27T03:50:08.036000+00:00",
"2020-11-25T06:06:07.910000+00:00",
"2020-10-21T02:17:39.198000+00:00",
"2020-10-02T01:44:17.197000+00:00",
"2020-12-08T05:04:35.419000+00:00",
"2020-10-23T00:25:01.106000+00:00",
"2020-12-10T01:37:48.987000+00:00",
"2020-10-08T05:35:12.525000+00:00",
"2020-10-28T02:41:42.671000+00:00",
"2020-10-22T00:32:37.607000+00:00",
"2020-10-27T06:18:42.715000+00:00",
"2020-11-27T02:22:36.165000+00:00",
"2020-12-20T12:04:18.314000+00:00",
"2020-11-24T05:33:20.884000+00:00",
"2020-12-10T00:32:51.611000+00:00",
"2020-10-15T00:30:54.576000+00:00",
"2020-10-19T05:15:33.712000+00:00",
"2020-11-06T00:34:06.186000+00:00",
"2020-10-02T00:34:50.494000+00:00",
"2020-11-25T01:47:57.398000+00:00",
"2020-12-26T13:27:41.589000+00:00",
"2020-10-22T06:11:02.945000+00:00",
"2020-12-03T02:00:33.684000+00:00",
"2020-11-18T00:39:17.492000+00:00",
"2020-11-19T00:31:54.080000+00:00",
"2020-10-16T00:32:26.207000+00:00"
],
"xbins": {
"size": "D1"
},
"y": [
"0000-0002-7397-7977",
"0000-0003-4931-9736",
"0000-0001-8221-2303",
"0000-0001-6736-072X",
"0000-0002-8727-1246",
"0000-0001-6760-9521",
"0000-0001-9283-9441",
"0000-0002-4732-4729",
"0000-0002-9827-9374",
"0000-0002-6834-0023",
"0000-0002-2002-1963",
"0000-0002-6761-8230",
"0000-0003-2879-0537",
"0000-0002-8132-9689",
"0000-0002-8780-3628",
"0000-0002-1638-362X",
"0000-0002-2288-2476",
"0000-0003-1668-4649",
"0000-0002-0274-0892",
"0000-0002-2226-8564",
"0000-0002-4644-3793",
"0000-0002-1450-0757",
"0000-0003-0934-7898",
"0000-0001-8857-1227",
"0000-0001-7391-5859",
"0000-0001-7855-7181",
"0000-0003-2092-5417",
"0000-0003-2802-4779",
"0000-0003-4446-8089",
"0000-0002-4169-2694",
"0000-0003-3544-8879",
"0000-0003-4183-5576",
"0000-0002-8715-8892",
"0000-0003-2084-7316",
"0000-0002-9104-1662",
"0000-0002-4643-1215",
"0000-0003-3672-4863",
"0000-0003-3822-4088",
"0000-0001-8056-2510",
"0000-0003-0116-611X",
"0000-0002-7866-9236",
"0000-0002-9483-7383",
"0000-0001-6180-8810",
"0000-0003-0815-2327",
"0000-0001-8577-7780",
"0000-0001-9143-4151",
"0000-0003-2213-5611",
"0000-0003-4137-5282",
"0000-0001-8543-9183",
"0000-0002-1346-7860",
"0000-0001-9141-0715",
"0000-0002-8923-182X",
"0000-0001-5466-8100",
"0000-0002-4847-5422",
"0000-0002-8769-5698",
"0000-0003-4107-9766",
"0000-0002-6027-4105",
"0000-0001-5782-8739",
"0000-0002-5911-6433",
"0000-0001-8021-0961",
"0000-0001-6247-4330",
"0000-0002-4491-1901",
"0000-0002-7736-940X",
"0000-0001-6119-7669",
"0000-0003-3877-5373",
"0000-0002-8929-7212",
"0000-0002-2459-2675",
"0000-0002-8156-5059",
"0000-0002-7114-5886",
"0000-0001-5097-977X",
"0000-0003-3138-197X",
"0000-0003-4083-2496",
"0000-0002-0338-3890",
"0000-0001-9872-5998",
"0000-0003-1091-0852",
"0000-0002-8561-142X",
"0000-0002-6052-6368",
"0000-0002-2862-2552",
"0000-0003-1164-9246",
"0000-0002-0726-7555",
"0000-0002-7800-1463",
"0000-0003-4868-5507",
"0000-0002-2049-316X",
"0000-0002-1295-2055",
"0000-0001-9281-8579",
"0000-0002-7810-3574",
"0000-0002-4910-1078",
"0000-0002-7877-8643",
"0000-0002-4809-8129",
"0000-0003-0739-261X",
"0000-0002-4555-6171",
"0000-0002-4828-8969",
"0000-0001-7065-7567",
"0000-0002-1043-5679",
"0000-0001-8942-822X",
"0000-0002-6106-9883",
"0000-0003-4561-1406",
"0000-0001-8834-2336",
"0000-0001-9293-2603",
"0000-0002-2299-2931",
"0000-0003-0529-408X",
"0000-0001-6040-4697",
"0000-0002-1772-6567",
"0000-0002-7328-7845",
"0000-0002-3856-3242",
"0000-0003-2768-672X",
"0000-0002-2564-7148",
"0000-0003-3095-4430",
"0000-0003-4298-9059",
"0000-0002-1322-595X",
"0000-0002-6346-5062",
"0000-0002-7684-1346",
"0000-0002-0837-7668",
"0000-0002-8547-0647",
"0000-0001-9393-1805",
"0000-0002-9071-6023",
"0000-0002-5953-3958",
"0000-0002-1698-5831",
"0000-0002-1338-9604",
"0000-0002-9586-0999",
"0000-0003-1137-9039",
"0000-0003-3480-0367",
"0000-0002-1491-2583",
"0000-0002-7894-3856",
"0000-0003-4977-7817",
"0000-0002-3459-7437",
"0000-0002-3725-5483",
"0000-0002-5538-8140",
"0000-0002-6151-3200",
"0000-0002-7630-5682",
"0000-0002-7681-0021",
"0000-0003-4306-9019",
"0000-0001-5546-8688",
"0000-0003-4431-5437",
"0000-0001-5968-1718",
"0000-0002-4091-7791",
"0000-0002-4137-7205",
"0000-0003-0572-8757",
"0000-0002-4864-0758",
"0000-0003-0756-5509",
"0000-0003-4096-1634",
"0000-0002-2413-4439",
"0000-0002-4679-0998",
"0000-0002-4645-269X",
"0000-0002-5922-3001",
"0000-0001-7256-5228",
"0000-0002-5105-8515",
"0000-0003-0098-7936",
"0000-0003-1866-3411",
"0000-0001-6111-5547",
"0000-0003-3525-3940",
"0000-0002-3644-3503",
"0000-0002-6028-7627",
"0000-0003-3175-5572",
"0000-0002-8287-7222",
"0000-0003-1595-1389",
"0000-0001-8377-8843",
"0000-0002-6968-6931",
"0000-0001-9338-4573",
"0000-0001-9504-9306",
"0000-0003-0254-7141",
"0000-0001-8275-9325",
"0000-0002-0702-1256",
"0000-0003-4922-7292",
"0000-0002-4815-8953",
"0000-0003-4222-2069",
"0000-0002-7586-5477",
"0000-0001-7531-6702",
"0000-0002-1764-6137",
"0000-0002-1936-2859",
"0000-0002-1110-0694",
"0000-0002-4580-1528",
"0000-0003-2354-3537",
"0000-0002-1112-0425",
"0000-0003-0226-9700",
"0000-0002-0750-4004",
"0000-0003-0798-5503",
"0000-0002-5724-3092",
"0000-0002-8037-1148",
"0000-0002-9075-6957",
"0000-0003-0273-3948",
"0000-0001-8105-0541",
"0000-0001-5240-8946",
"0000-0002-1255-6722",
"0000-0002-2248-9076",
"0000-0002-2482-3031",
"0000-0001-6610-4716",
"0000-0002-2679-385X",
"0000-0001-7351-8749",
"0000-0003-1870-2011",
"0000-0002-5780-1605",
"0000-0001-9548-6661",
"0000-0001-7956-0921",
"0000-0001-6077-1083",
"0000-0002-4312-9694",
"0000-0003-2726-990X",
"0000-0002-2310-0990",
"0000-0003-1218-6052",
"0000-0003-4137-9291",
"0000-0002-7546-3240",
"0000-0001-9632-6459",
"0000-0002-0317-7042",
"0000-0002-7785-451X",
"0000-0001-5329-7467",
"0000-0002-1872-3667",
"0000-0003-1180-8753",
"0000-0003-0011-1520",
"0000-0002-7218-6057",
"0000-0003-0225-1324",
"0000-0001-8055-8992",
"0000-0002-3019-6791",
"0000-0003-2859-603X",
"0000-0001-9119-6026",
"0000-0002-0587-2233",
"0000-0002-9511-9754",
"0000-0003-1517-3898",
"0000-0003-3993-8541",
"0000-0003-0208-4394",
"0000-0003-2322-3859",
"0000-0002-5105-7504",
"0000-0002-6613-9166",
"0000-0001-5793-9375",
"0000-0002-8887-0646",
"0000-0001-5618-4820",
"0000-0001-8160-5658",
"0000-0003-4635-2733",
"0000-0002-4032-956X",
"0000-0001-8926-0922",
"0000-0002-4366-3395",
"0000-0001-6056-0964",
"0000-0002-3423-891X",
"0000-0002-1547-4204",
"0000-0002-4324-7121",
"0000-0001-7540-3339",
"0000-0001-8624-3027",
"0000-0002-3475-2388",
"0000-0001-5049-4721",
"0000-0002-0549-2812",
"0000-0003-4352-3234",
"0000-0002-0285-6940",
"0000-0001-5883-3337",
"0000-0001-7918-7071",
"0000-0003-0270-8849",
"0000-0002-1059-9753",
"0000-0002-7330-596X",
"0000-0001-8805-3681",
"0000-0003-0272-1883",
"0000-0002-3679-6886",
"0000-0003-3718-4774",
"0000-0002-5438-2716",
"0000-0002-4805-891X",
"0000-0003-1151-7112",
"0000-0002-1649-4661",
"0000-0003-2956-5278",
"0000-0002-8016-3111",
"0000-0003-2827-1227",
"0000-0003-2145-4717",
"0000-0003-2028-9510",
"0000-0003-4424-6881",
"0000-0001-7474-2988",
"0000-0002-2510-6501",
"0000-0003-3582-6294",
"0000-0002-1343-126X",
"0000-0002-4595-2588",
"0000-0002-9526-2876",
"0000-0003-1158-7057",
"0000-0002-3345-5154",
"0000-0002-8244-6924",
"0000-0003-4806-3951",
"0000-0002-7464-3688",
"0000-0003-2647-2359",
"0000-0001-6073-387X",
"0000-0001-8230-5606",
"0000-0001-9108-194X",
"0000-0002-1674-6427",
"0000-0002-6355-1382",
"0000-0003-3632-7673",
"0000-0002-5140-4932",
"0000-0003-2316-2347",
"0000-0003-2793-1027",
"0000-0002-8554-3738",
"0000-0001-6673-083X",
"0000-0001-7378-174X",
"0000-0002-1614-9163",
"0000-0002-8282-0330",
"0000-0003-3527-7336",
"0000-0001-7189-2147",
"0000-0002-3344-3903",
"0000-0003-1786-3489",
"0000-0001-7108-8036",
"0000-0003-4078-2193",
"0000-0001-9352-3676",
"0000-0001-9856-8531",
"0000-0002-9276-8003",
"0000-0003-0872-8082",
"0000-0002-7824-1395",
"0000-0001-5224-7353",
"0000-0002-5281-2110",
"0000-0002-3201-6256",
"0000-0003-3697-1370",
"0000-0002-7678-0520",
"0000-0002-3741-371X",
"0000-0002-8132-2356",
"0000-0001-8074-1025",
"0000-0002-7896-7268",
"0000-0002-5536-6005",
"0000-0002-5627-5594",
"0000-0002-2790-8196",
"0000-0002-4814-6303",
"0000-0003-3563-8111",
"0000-0002-3894-8185",
"0000-0001-6608-759X",
"0000-0001-6678-4133",
"0000-0001-6893-5775",
"0000-0002-9472-9307",
"0000-0003-2441-0736",
"0000-0002-8295-024X",
"0000-0002-4322-6590",
"0000-0001-9047-2156",
"0000-0002-9072-6328",
"0000-0002-1625-1774",
"0000-0002-2315-0242",
"0000-0001-6178-4337",
"0000-0003-1272-8819",
"0000-0003-1460-8458",
"0000-0003-4091-5443",
"0000-0001-6647-3570",
"0000-0002-0756-4381",
"0000-0002-7428-5307",
"0000-0002-7699-0579",
"0000-0002-3145-000X",
"0000-0002-9249-230X",
"0000-0002-0511-4600",
"0000-0003-1621-0435",
"0000-0002-6126-7150",
"0000-0002-8161-6469",
"0000-0002-9577-7263",
"0000-0003-4087-3992",
"0000-0002-1295-2450",
"0000-0002-5135-5341",
"0000-0002-7357-3104",
"0000-0002-2913-3400",
"0000-0003-0271-5678",
"0000-0002-3427-5032",
"0000-0002-2550-0496",
"0000-0001-6399-6536",
"0000-0001-7317-8000",
"0000-0002-0357-9789",
"0000-0002-8995-3531",
"0000-0003-4421-9234",
"0000-0002-6004-465X",
"0000-0002-8739-4473",
"0000-0002-5576-1865",
"0000-0002-6978-9068",
"0000-0001-9918-496X",
"0000-0003-2243-8460",
"0000-0003-2778-4266",
"0000-0003-4318-6821",
"0000-0002-7600-7845",
"0000-0002-8469-1288",
"0000-0003-4684-4148",
"0000-0002-7312-128X",
"0000-0001-5236-2001",
"0000-0001-8692-9651",
"0000-0002-8544-0202",
"0000-0002-8884-3592",
"0000-0002-1231-9903",
"0000-0003-0407-7913",
"0000-0001-7196-6679",
"0000-0002-1319-1358",
"0000-0001-7305-3240",
"0000-0002-9910-2765",
"0000-0003-4952-1491",
"0000-0002-7466-2256",
"0000-0002-9221-8730",
"0000-0001-9116-7882",
"0000-0002-8938-6004",
"0000-0002-8684-4321",
"0000-0003-0818-6607",
"0000-0001-6798-4401",
"0000-0001-6500-9047",
"0000-0002-2693-5627",
"0000-0002-7256-6340",
"0000-0002-0194-6362",
"0000-0002-5125-5092",
"0000-0002-7197-2355",
"0000-0002-4535-0958",
"0000-0001-6476-7321",
"0000-0003-3345-4757",
"0000-0003-4179-1155",
"0000-0002-5536-0156",
"0000-0002-6206-8444",
"0000-0001-6570-1594",
"0000-0002-9711-0067",
"0000-0001-5321-0392",
"0000-0003-2488-1263",
"0000-0003-2781-8198",
"0000-0002-3739-6681",
"0000-0003-3577-7963",
"0000-0003-3286-3073",
"0000-0001-8200-3333",
"0000-0002-5813-2765",
"0000-0002-6206-3563",
"0000-0003-3660-364X",
"0000-0002-4134-3566",
"0000-0001-7768-9799",
"0000-0001-5895-047X",
"0000-0002-4722-8681",
"0000-0001-9603-8908",
"0000-0002-0772-1586",
"0000-0001-6797-3964",
"0000-0002-6834-901X",
"0000-0002-1554-8306",
"0000-0002-1879-4262",
"0000-0002-9640-8136",
"0000-0002-6926-3752",
"0000-0002-3655-4713",
"0000-0002-8724-1020",
"0000-0002-4601-4569"
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Activation distribution for bio \"more straightforward way to borrow the money you\""
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"05a1aac8-0f09-4c92-9ab4-3f4da9a3eaa8\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"05a1aac8-0f09-4c92-9ab4-3f4da9a3eaa8\")) { Plotly.newPlot( \"05a1aac8-0f09-4c92-9ab4-3f4da9a3eaa8\", [{\"histfunc\": \"count\", \"type\": \"histogram\", \"x\": [\"2020-11-06T06:10:20.070000+00:00\", \"2020-11-13T01:04:19.859000+00:00\", \"2020-11-05T00:38:21.096000+00:00\", \"2020-12-08T05:38:30.786000+00:00\", \"2020-12-10T08:54:56.127000+00:00\", \"2020-11-20T09:11:08.356000+00:00\", \"2020-12-10T05:26:14.534000+00:00\", \"2020-12-04T02:41:11.756000+00:00\", \"2020-11-26T04:16:15.824000+00:00\", \"2020-10-12T04:58:17.220000+00:00\", \"2020-12-08T00:35:36.543000+00:00\", \"2020-11-30T01:30:22.357000+00:00\", \"2020-10-19T01:51:35.391000+00:00\", \"2020-12-07T04:43:46.569000+00:00\", \"2020-11-12T06:33:38.112000+00:00\", \"2020-10-20T05:55:09.939000+00:00\", \"2020-11-18T02:15:05.122000+00:00\", \"2020-12-04T00:53:17.885000+00:00\", \"2020-10-16T02:03:07.922000+00:00\", \"2020-12-07T01:05:49.858000+00:00\", \"2020-12-09T09:51:39.412000+00:00\", \"2020-10-16T05:25:52.218000+00:00\", \"2021-01-04T15:49:35.727000+00:00\", \"2020-12-21T13:42:08.792000+00:00\", \"2020-12-07T05:45:44.145000+00:00\", \"2020-12-01T06:01:05.133000+00:00\", \"2020-12-10T03:52:43.220000+00:00\", \"2020-12-08T10:18:31.859000+00:00\", \"2020-12-22T13:49:16.908000+00:00\", \"2020-12-04T04:35:16.628000+00:00\", \"2020-12-04T06:16:50.173000+00:00\", \"2020-11-23T01:24:52.965000+00:00\", \"2020-12-18T13:09:47.463000+00:00\", \"2020-12-09T03:17:51.528000+00:00\", \"2020-10-01T00:37:22.535000+00:00\", \"2020-12-07T04:11:41.932000+00:00\", \"2020-12-10T02:07:49.921000+00:00\", \"2020-12-07T02:03:15.901000+00:00\", \"2020-12-08T01:10:25.988000+00:00\", \"2020-12-09T02:50:31.195000+00:00\", \"2020-12-10T02:31:54.901000+00:00\", \"2020-12-04T06:49:59.988000+00:00\", \"2020-12-20T14:35:28.033000+00:00\", \"2020-11-13T02:36:00.122000+00:00\", \"2020-12-02T01:51:22.669000+00:00\", \"2020-11-18T00:37:25.591000+00:00\", \"2020-11-05T01:53:15.027000+00:00\", \"2020-10-09T00:29:08.619000+00:00\", \"2020-10-28T01:35:34.572000+00:00\", \"2020-10-05T04:42:51.206000+00:00\", \"2020-12-11T07:30:16.966000+00:00\", \"2020-11-27T05:05:54.359000+00:00\", \"2020-12-04T01:13:35.987000+00:00\", \"2020-12-04T02:52:36.139000+00:00\", \"2020-12-09T04:01:45.997000+00:00\", \"2020-12-07T07:27:31.433000+00:00\", \"2020-11-02T01:46:58.533000+00:00\", \"2020-11-16T01:34:33.962000+00:00\", \"2020-10-08T00:35:13.677000+00:00\", \"2020-12-07T01:01:40.495000+00:00\", \"2020-10-08T00:40:42.151000+00:00\", \"2020-12-09T16:53:58.895000+00:00\", \"2020-12-03T04:17:44.994000+00:00\", \"2020-10-29T00:32:03.420000+00:00\", \"2020-10-21T00:33:19.700000+00:00\", \"2020-12-07T02:01:03.230000+00:00\", \"2020-11-27T03:03:57.166000+00:00\", \"2020-11-20T00:31:02.803000+00:00\", \"2020-12-10T00:44:32.141000+00:00\", \"2020-11-06T03:30:11.523000+00:00\", \"2020-12-08T01:31:33.785000+00:00\", \"2020-12-03T04:42:29.095000+00:00\", \"2020-11-30T03:07:50.342000+00:00\", \"2020-12-10T03:57:58.215000+00:00\", \"2020-12-08T05:30:35.820000+00:00\", \"2020-10-19T01:48:20.788000+00:00\", \"2020-12-10T02:03:52.708000+00:00\", \"2020-11-02T00:49:49.169000+00:00\", \"2020-10-23T02:48:08.535000+00:00\", \"2020-12-04T07:08:18.268000+00:00\", \"2020-12-09T05:49:49.132000+00:00\", \"2020-11-19T04:33:54.243000+00:00\", \"2020-11-20T08:31:18.007000+00:00\", \"2020-12-24T09:34:06.934000+00:00\", \"2020-11-12T05:14:22.824000+00:00\", \"2020-10-01T02:31:37.319000+00:00\", \"2020-12-03T06:09:53.307000+00:00\", \"2020-12-08T06:22:48.990000+00:00\", \"2020-10-02T00:35:57.966000+00:00\", \"2020-10-23T01:46:33.848000+00:00\", \"2020-12-11T00:42:31.593000+00:00\", \"2020-12-08T01:58:02.205000+00:00\", \"2020-12-04T07:31:58.866000+00:00\", \"2020-10-23T07:14:02.472000+00:00\", \"2020-12-09T00:36:57.747000+00:00\", \"2020-11-23T02:12:15.138000+00:00\", \"2020-12-01T05:56:25.276000+00:00\", \"2020-10-19T00:36:24.880000+00:00\", \"2020-10-01T02:14:30.144000+00:00\", \"2020-10-06T04:09:04.132000+00:00\", \"2020-11-04T07:11:18.875000+00:00\", \"2020-12-07T02:01:23.937000+00:00\", \"2020-11-25T00:29:34.048000+00:00\", \"2020-10-20T04:34:33.072000+00:00\", \"2020-12-07T09:28:58.928000+00:00\", \"2020-12-10T06:14:55.197000+00:00\", \"2020-11-24T01:59:39.104000+00:00\", \"2020-12-10T10:30:17.368000+00:00\", \"2020-12-04T03:52:11.968000+00:00\", \"2020-11-03T01:36:12.954000+00:00\", \"2020-12-08T20:44:30.147000+00:00\", \"2020-11-30T02:53:09.083000+00:00\", \"2020-10-23T07:15:19.894000+00:00\", \"2020-10-12T01:51:02.464000+00:00\", \"2020-11-03T00:35:56.866000+00:00\", \"2020-11-02T03:00:22.222000+00:00\", \"2020-12-08T07:56:01.181000+00:00\", \"2020-11-09T01:54:10.680000+00:00\", \"2020-12-09T02:20:15.699000+00:00\", \"2020-12-10T03:39:00.227000+00:00\", \"2020-12-08T06:04:10.336000+00:00\", \"2020-12-09T05:14:08.617000+00:00\", \"2020-12-23T09:36:08.353000+00:00\", \"2020-10-15T05:07:24.672000+00:00\", \"2020-12-10T00:37:21.558000+00:00\", \"2020-10-20T03:04:21.659000+00:00\", \"2020-12-08T07:53:40.113000+00:00\", \"2020-10-07T03:03:36.133000+00:00\", \"2020-10-28T01:58:35.294000+00:00\", \"2020-11-26T00:29:40.992000+00:00\", \"2020-11-17T07:17:47.484000+00:00\", \"2020-12-02T02:29:02.429000+00:00\", \"2020-10-29T05:26:54.912000+00:00\", \"2020-12-02T22:59:57.295000+00:00\", \"2020-10-13T04:45:07.662000+00:00\", \"2020-12-18T17:12:04.145000+00:00\", \"2020-10-15T02:09:30.964000+00:00\", \"2020-11-13T08:00:46.351000+00:00\", \"2020-12-01T06:14:27.962000+00:00\", \"2020-12-10T07:02:35.739000+00:00\", \"2020-10-26T00:26:09.410000+00:00\", \"2020-11-04T00:58:53.122000+00:00\", \"2020-10-26T06:21:04.196000+00:00\", \"2020-12-10T07:07:19.553000+00:00\", \"2020-11-25T00:49:47.126000+00:00\", \"2020-11-18T04:22:49.488000+00:00\", \"2020-11-27T01:33:55.500000+00:00\", \"2020-12-22T09:45:55.961000+00:00\", \"2020-10-26T01:29:08.608000+00:00\", \"2020-12-08T02:45:05.088000+00:00\", \"2020-10-20T01:49:48.227000+00:00\", \"2020-12-08T01:05:07.944000+00:00\", \"2020-12-09T07:20:54.278000+00:00\", \"2020-10-12T00:32:18.880000+00:00\", \"2020-11-26T02:04:06.845000+00:00\", \"2020-11-27T00:42:50.071000+00:00\", \"2020-12-09T00:42:15.741000+00:00\", \"2020-10-13T01:33:56.576000+00:00\", \"2020-12-09T00:55:26.653000+00:00\", \"2020-12-02T00:34:06.686000+00:00\", \"2020-11-25T04:24:02.933000+00:00\", \"2020-10-20T00:35:19.784000+00:00\", \"2020-12-08T07:18:00.879000+00:00\", \"2020-10-05T05:31:24.831000+00:00\", \"2020-11-26T06:10:56.539000+00:00\", \"2020-10-16T00:59:50.730000+00:00\", \"2020-10-05T00:44:54.638000+00:00\", \"2020-11-24T03:32:10.726000+00:00\", \"2020-10-29T02:14:36.912000+00:00\", \"2020-10-28T04:32:29.960000+00:00\", \"2020-12-03T02:27:55.773000+00:00\", \"2020-10-13T03:09:38.953000+00:00\", \"2020-11-03T00:32:33.060000+00:00\", \"2020-11-23T00:28:42.098000+00:00\", \"2020-10-05T07:23:56.871000+00:00\", \"2020-12-08T07:41:30.994000+00:00\", \"2020-10-13T01:53:14.768000+00:00\", \"2020-11-19T07:33:12.511000+00:00\", \"2020-11-19T02:22:58.970000+00:00\", \"2020-12-03T01:02:12.893000+00:00\", \"2020-12-10T09:19:53.009000+00:00\", \"2020-12-03T01:57:58.432000+00:00\", \"2020-11-12T01:53:16.920000+00:00\", \"2020-12-10T05:08:26.115000+00:00\", \"2020-12-03T03:19:39.822000+00:00\", \"2020-11-25T06:51:58.033000+00:00\", \"2020-11-03T06:11:51.922000+00:00\", \"2020-10-27T04:51:49.250000+00:00\", \"2020-12-01T07:40:33.026000+00:00\", \"2020-11-24T00:39:57.593000+00:00\", \"2020-12-11T07:22:37.229000+00:00\", \"2020-11-06T01:49:14.557000+00:00\", \"2020-12-09T00:46:22.474000+00:00\", \"2020-10-09T00:07:27.401000+00:00\", \"2020-10-21T04:56:50.213000+00:00\", \"2020-12-09T00:38:12.761000+00:00\", \"2020-10-19T03:09:40.210000+00:00\", \"2020-12-08T07:42:26.163000+00:00\", \"2020-12-09T04:41:45.743000+00:00\", \"2020-12-10T00:38:34.572000+00:00\", \"2020-10-06T04:23:17.821000+00:00\", \"2020-10-01T04:41:04.450000+00:00\", \"2020-10-29T01:59:03.954000+00:00\", \"2020-12-08T03:35:22.720000+00:00\", \"2020-12-10T02:17:41.809000+00:00\", \"2020-11-23T07:45:27.508000+00:00\", \"2020-12-09T07:32:59.237000+00:00\", \"2020-11-16T04:30:45.959000+00:00\", \"2020-10-26T02:57:43.771000+00:00\", \"2020-12-10T02:57:23.162000+00:00\", \"2020-11-27T04:06:45.649000+00:00\", \"2020-12-08T00:20:19.678000+00:00\", \"2020-10-15T01:44:03.511000+00:00\", \"2020-10-14T01:42:51.911000+00:00\", \"2020-10-30T00:42:28.296000+00:00\", \"2020-10-26T00:37:39.903000+00:00\", \"2020-10-26T05:10:04.027000+00:00\", \"2020-10-14T01:46:26.356000+00:00\", \"2020-10-05T06:03:47.887000+00:00\", \"2020-10-22T03:10:26.276000+00:00\", \"2020-12-08T05:41:34.740000+00:00\", \"2020-12-07T06:59:30.009000+00:00\", \"2020-10-06T05:41:06.471000+00:00\", \"2020-11-27T02:12:42.102000+00:00\", \"2020-12-08T01:53:47.265000+00:00\", \"2020-10-27T05:22:02.063000+00:00\", \"2020-12-08T04:12:44.753000+00:00\", \"2020-10-13T00:37:29.576000+00:00\", \"2020-12-02T03:46:17.210000+00:00\", \"2020-12-08T00:33:43.254000+00:00\", \"2020-12-09T03:34:43.573000+00:00\", \"2020-10-07T00:40:13.702000+00:00\", \"2020-12-10T00:21:20.069000+00:00\", \"2020-12-03T18:40:50.995000+00:00\", \"2020-12-09T15:11:47.332000+00:00\", \"2020-12-26T05:24:05.862000+00:00\", \"2020-10-06T00:43:30.492000+00:00\", \"2020-11-02T05:03:21.859000+00:00\", \"2020-10-09T03:10:19.759000+00:00\", \"2020-12-10T06:32:56.543000+00:00\", \"2020-10-14T00:31:14.753000+00:00\", \"2020-10-15T03:07:59.357000+00:00\", \"2020-10-09T02:05:42.975000+00:00\", \"2020-11-10T04:06:00.132000+00:00\", \"2020-11-19T06:10:05.185000+00:00\", \"2020-10-07T04:57:02.723000+00:00\", \"2020-11-10T05:20:38.400000+00:00\", \"2020-12-01T03:16:28.604000+00:00\", \"2020-12-11T00:22:57.205000+00:00\", \"2020-12-08T00:40:31.488000+00:00\", \"2020-12-09T03:57:54.146000+00:00\", \"2020-11-02T07:45:42.838000+00:00\", \"2020-10-16T03:43:52.975000+00:00\", \"2020-12-07T02:34:45.335000+00:00\", \"2020-12-10T05:26:59.077000+00:00\", \"2020-10-06T02:51:09.557000+00:00\", \"2020-12-03T17:36:44.267000+00:00\", \"2020-12-09T07:15:59.846000+00:00\", \"2020-12-09T04:20:24.180000+00:00\", \"2020-10-12T00:29:41.477000+00:00\", \"2020-10-08T03:09:24.761000+00:00\", \"2020-11-10T00:44:33.074000+00:00\", \"2020-10-06T00:44:53.643000+00:00\", \"2020-10-29T00:40:45.821000+00:00\", \"2020-10-22T01:40:49.877000+00:00\", \"2020-10-05T00:45:40.224000+00:00\", \"2020-12-04T05:56:11.632000+00:00\", \"2020-12-09T03:12:40.086000+00:00\", \"2020-12-07T01:16:29.015000+00:00\", \"2020-10-13T00:32:41.200000+00:00\", \"2020-12-11T05:10:03.095000+00:00\", \"2020-10-14T04:47:45.211000+00:00\", \"2020-11-09T03:37:34.513000+00:00\", \"2020-11-09T00:41:45.173000+00:00\", \"2020-12-08T01:50:10.568000+00:00\", \"2020-11-24T01:51:28.207000+00:00\", \"2020-12-10T01:52:37.083000+00:00\", \"2020-12-22T11:47:29.012000+00:00\", \"2020-10-01T01:14:36.461000+00:00\", \"2020-12-07T07:24:21.357000+00:00\", \"2020-11-05T04:56:52.777000+00:00\", \"2020-12-11T05:15:34.655000+00:00\", \"2020-11-05T03:01:48.301000+00:00\", \"2020-11-11T03:56:13.111000+00:00\", \"2020-12-09T06:11:25.359000+00:00\", \"2020-12-09T04:28:58.267000+00:00\", \"2020-10-30T07:19:59.994000+00:00\", \"2020-10-07T06:09:58.118000+00:00\", \"2020-11-04T02:32:20.006000+00:00\", \"2020-12-08T00:34:59.437000+00:00\", \"2020-12-10T03:32:11.013000+00:00\", \"2020-10-27T02:55:23.288000+00:00\", \"2020-11-06T01:46:33.352000+00:00\", \"2020-12-07T02:28:40.834000+00:00\", \"2020-11-04T07:18:51.293000+00:00\", \"2020-10-08T04:37:16.253000+00:00\", \"2020-12-09T08:26:09.172000+00:00\", \"2020-12-09T06:41:51.112000+00:00\", \"2020-11-03T02:49:25.793000+00:00\", \"2020-11-06T00:38:50.208000+00:00\", \"2020-10-21T01:39:42.463000+00:00\", \"2020-12-08T02:04:35.965000+00:00\", \"2020-12-27T12:39:28.524000+00:00\", \"2020-11-20T05:49:25.708000+00:00\", \"2020-12-04T06:25:21.013000+00:00\", \"2020-11-16T00:26:54.233000+00:00\", \"2020-11-03T04:40:55.485000+00:00\", \"2020-11-25T08:24:26.620000+00:00\", \"2020-10-28T00:33:46.915000+00:00\", \"2020-12-10T06:01:53.737000+00:00\", \"2020-11-04T05:13:14.840000+00:00\", \"2020-11-18T05:44:57.361000+00:00\", \"2020-12-07T00:36:46.723000+00:00\", \"2020-11-16T02:42:38.500000+00:00\", \"2020-11-30T00:27:02.900000+00:00\", \"2020-12-10T02:04:37.554000+00:00\", \"2020-12-09T02:02:01.646000+00:00\", \"2020-12-07T03:42:54.669000+00:00\", \"2020-12-27T03:56:52.049000+00:00\", \"2020-11-09T00:38:44.818000+00:00\", \"2020-12-07T06:47:43.421000+00:00\", \"2020-12-10T04:13:00.551000+00:00\", \"2020-12-26T15:17:09.480000+00:00\", \"2020-11-05T02:09:35.869000+00:00\", \"2020-12-08T02:01:25.880000+00:00\", \"2020-11-19T00:47:12.305000+00:00\", \"2020-10-09T01:56:49.349000+00:00\", \"2020-10-15T00:32:01.688000+00:00\", \"2020-10-27T02:41:01.334000+00:00\", \"2020-11-18T02:12:25.671000+00:00\", \"2020-10-07T04:58:13.870000+00:00\", \"2020-12-07T02:20:06.535000+00:00\", \"2020-10-12T03:10:36.308000+00:00\", \"2020-10-29T03:18:20.673000+00:00\", \"2020-12-09T08:06:54.526000+00:00\", \"2020-11-12T03:17:05.140000+00:00\", \"2020-10-01T01:15:09.993000+00:00\", \"2020-11-11T00:51:14.004000+00:00\", \"2020-11-26T01:43:11.269000+00:00\", \"2020-11-24T00:43:00.335000+00:00\", \"2020-11-30T04:44:39.496000+00:00\", \"2020-10-23T00:30:18.009000+00:00\", \"2020-10-16T02:12:55.314000+00:00\", \"2020-12-02T06:24:05.665000+00:00\", \"2021-01-20T10:24:17.206000+00:00\", \"2020-11-05T00:36:01.999000+00:00\", \"2020-12-02T02:41:16.199000+00:00\", \"2020-12-03T00:30:22.897000+00:00\", \"2020-12-10T03:39:20.103000+00:00\", \"2020-11-11T02:04:59.354000+00:00\", \"2020-12-08T06:38:19.302000+00:00\", \"2020-12-01T00:35:19.625000+00:00\", \"2020-11-11T05:45:47.110000+00:00\", \"2020-10-28T00:35:44.890000+00:00\", \"2020-12-07T04:16:41.797000+00:00\", \"2020-11-26T02:16:11.427000+00:00\", \"2020-11-23T02:32:21.420000+00:00\", \"2020-11-30T03:20:48.098000+00:00\", \"2020-12-09T00:51:34.359000+00:00\", \"2020-12-21T15:37:42.646000+00:00\", \"2020-12-04T02:14:35.901000+00:00\", \"2020-12-04T04:47:02.247000+00:00\", \"2020-10-07T00:37:11.738000+00:00\", \"2020-11-10T01:59:01.119000+00:00\", \"2020-10-02T02:48:15.987000+00:00\", \"2020-10-22T01:52:32.093000+00:00\", \"2020-11-23T00:21:54.782000+00:00\", \"2020-12-08T06:25:58.310000+00:00\", \"2020-10-02T01:45:44.168000+00:00\", \"2020-11-09T02:00:09.022000+00:00\", \"2020-11-30T01:55:34.221000+00:00\", \"2020-12-07T00:32:11.199000+00:00\", \"2020-12-06T17:32:11.180000+00:00\", \"2020-11-13T05:32:08.198000+00:00\", \"2020-12-08T07:42:30.666000+00:00\", \"2020-10-30T00:41:50.497000+00:00\", \"2020-10-21T00:36:51.459000+00:00\", \"2020-10-22T00:30:26.343000+00:00\", \"2020-10-14T00:33:42.972000+00:00\", \"2020-11-26T02:58:59.509000+00:00\", \"2020-10-08T04:50:07.561000+00:00\", \"2020-10-21T02:47:27.539000+00:00\", \"2020-11-12T00:38:10.297000+00:00\", \"2020-11-20T00:59:49.538000+00:00\", \"2020-12-09T01:59:11.916000+00:00\", \"2020-12-11T04:13:54.427000+00:00\", \"2020-10-02T03:18:52.565000+00:00\", \"2020-12-07T04:29:04.602000+00:00\", \"2020-12-06T18:46:28.549000+00:00\", \"2020-10-19T00:30:26.334000+00:00\", \"2020-12-10T02:10:10.657000+00:00\", \"2020-12-08T00:49:04.595000+00:00\", \"2020-10-14T02:59:56.059000+00:00\", \"2020-10-27T03:50:08.036000+00:00\", \"2020-11-25T06:06:07.910000+00:00\", \"2020-10-21T02:17:39.198000+00:00\", \"2020-10-02T01:44:17.197000+00:00\", \"2020-12-08T05:04:35.419000+00:00\", \"2020-10-23T00:25:01.106000+00:00\", \"2020-12-10T01:37:48.987000+00:00\", \"2020-10-08T05:35:12.525000+00:00\", \"2020-10-28T02:41:42.671000+00:00\", \"2020-10-22T00:32:37.607000+00:00\", \"2020-10-27T06:18:42.715000+00:00\", \"2020-11-27T02:22:36.165000+00:00\", \"2020-12-20T12:04:18.314000+00:00\", \"2020-11-24T05:33:20.884000+00:00\", \"2020-12-10T00:32:51.611000+00:00\", \"2020-10-15T00:30:54.576000+00:00\", \"2020-10-19T05:15:33.712000+00:00\", \"2020-11-06T00:34:06.186000+00:00\", \"2020-10-02T00:34:50.494000+00:00\", \"2020-11-25T01:47:57.398000+00:00\", \"2020-12-26T13:27:41.589000+00:00\", \"2020-10-22T06:11:02.945000+00:00\", \"2020-12-03T02:00:33.684000+00:00\", \"2020-11-18T00:39:17.492000+00:00\", \"2020-11-19T00:31:54.080000+00:00\", \"2020-10-16T00:32:26.207000+00:00\"], \"xbins\": {\"size\": \"D1\"}, \"y\": [\"0000-0002-7397-7977\", \"0000-0003-4931-9736\", \"0000-0001-8221-2303\", \"0000-0001-6736-072X\", \"0000-0002-8727-1246\", \"0000-0001-6760-9521\", \"0000-0001-9283-9441\", \"0000-0002-4732-4729\", \"0000-0002-9827-9374\", \"0000-0002-6834-0023\", \"0000-0002-2002-1963\", \"0000-0002-6761-8230\", \"0000-0003-2879-0537\", \"0000-0002-8132-9689\", \"0000-0002-8780-3628\", \"0000-0002-1638-362X\", \"0000-0002-2288-2476\", \"0000-0003-1668-4649\", \"0000-0002-0274-0892\", \"0000-0002-2226-8564\", \"0000-0002-4644-3793\", \"0000-0002-1450-0757\", \"0000-0003-0934-7898\", \"0000-0001-8857-1227\", \"0000-0001-7391-5859\", \"0000-0001-7855-7181\", \"0000-0003-2092-5417\", \"0000-0003-2802-4779\", \"0000-0003-4446-8089\", \"0000-0002-4169-2694\", \"0000-0003-3544-8879\", \"0000-0003-4183-5576\", \"0000-0002-8715-8892\", \"0000-0003-2084-7316\", \"0000-0002-9104-1662\", \"0000-0002-4643-1215\", \"0000-0003-3672-4863\", \"0000-0003-3822-4088\", \"0000-0001-8056-2510\", \"0000-0003-0116-611X\", \"0000-0002-7866-9236\", \"0000-0002-9483-7383\", \"0000-0001-6180-8810\", \"0000-0003-0815-2327\", \"0000-0001-8577-7780\", \"0000-0001-9143-4151\", \"0000-0003-2213-5611\", \"0000-0003-4137-5282\", \"0000-0001-8543-9183\", \"0000-0002-1346-7860\", \"0000-0001-9141-0715\", \"0000-0002-8923-182X\", \"0000-0001-5466-8100\", \"0000-0002-4847-5422\", \"0000-0002-8769-5698\", \"0000-0003-4107-9766\", \"0000-0002-6027-4105\", \"0000-0001-5782-8739\", \"0000-0002-5911-6433\", \"0000-0001-8021-0961\", \"0000-0001-6247-4330\", \"0000-0002-4491-1901\", \"0000-0002-7736-940X\", \"0000-0001-6119-7669\", \"0000-0003-3877-5373\", \"0000-0002-8929-7212\", \"0000-0002-2459-2675\", \"0000-0002-8156-5059\", \"0000-0002-7114-5886\", \"0000-0001-5097-977X\", \"0000-0003-3138-197X\", \"0000-0003-4083-2496\", \"0000-0002-0338-3890\", \"0000-0001-9872-5998\", \"0000-0003-1091-0852\", \"0000-0002-8561-142X\", \"0000-0002-6052-6368\", \"0000-0002-2862-2552\", \"0000-0003-1164-9246\", \"0000-0002-0726-7555\", \"0000-0002-7800-1463\", \"0000-0003-4868-5507\", \"0000-0002-2049-316X\", \"0000-0002-1295-2055\", \"0000-0001-9281-8579\", \"0000-0002-7810-3574\", \"0000-0002-4910-1078\", \"0000-0002-7877-8643\", \"0000-0002-4809-8129\", \"0000-0003-0739-261X\", \"0000-0002-4555-6171\", \"0000-0002-4828-8969\", \"0000-0001-7065-7567\", \"0000-0002-1043-5679\", \"0000-0001-8942-822X\", \"0000-0002-6106-9883\", \"0000-0003-4561-1406\", \"0000-0001-8834-2336\", \"0000-0001-9293-2603\", \"0000-0002-2299-2931\", \"0000-0003-0529-408X\", \"0000-0001-6040-4697\", \"0000-0002-1772-6567\", \"0000-0002-7328-7845\", \"0000-0002-3856-3242\", \"0000-0003-2768-672X\", \"0000-0002-2564-7148\", \"0000-0003-3095-4430\", \"0000-0003-4298-9059\", \"0000-0002-1322-595X\", \"0000-0002-6346-5062\", \"0000-0002-7684-1346\", \"0000-0002-0837-7668\", \"0000-0002-8547-0647\", \"0000-0001-9393-1805\", \"0000-0002-9071-6023\", \"0000-0002-5953-3958\", \"0000-0002-1698-5831\", \"0000-0002-1338-9604\", \"0000-0002-9586-0999\", \"0000-0003-1137-9039\", \"0000-0003-3480-0367\", \"0000-0002-1491-2583\", \"0000-0002-7894-3856\", \"0000-0003-4977-7817\", \"0000-0002-3459-7437\", \"0000-0002-3725-5483\", \"0000-0002-5538-8140\", \"0000-0002-6151-3200\", \"0000-0002-7630-5682\", \"0000-0002-7681-0021\", \"0000-0003-4306-9019\", \"0000-0001-5546-8688\", \"0000-0003-4431-5437\", \"0000-0001-5968-1718\", \"0000-0002-4091-7791\", \"0000-0002-4137-7205\", \"0000-0003-0572-8757\", \"0000-0002-4864-0758\", \"0000-0003-0756-5509\", \"0000-0003-4096-1634\", \"0000-0002-2413-4439\", \"0000-0002-4679-0998\", \"0000-0002-4645-269X\", \"0000-0002-5922-3001\", \"0000-0001-7256-5228\", \"0000-0002-5105-8515\", \"0000-0003-0098-7936\", \"0000-0003-1866-3411\", \"0000-0001-6111-5547\", \"0000-0003-3525-3940\", \"0000-0002-3644-3503\", \"0000-0002-6028-7627\", \"0000-0003-3175-5572\", \"0000-0002-8287-7222\", \"0000-0003-1595-1389\", \"0000-0001-8377-8843\", \"0000-0002-6968-6931\", \"0000-0001-9338-4573\", \"0000-0001-9504-9306\", \"0000-0003-0254-7141\", \"0000-0001-8275-9325\", \"0000-0002-0702-1256\", \"0000-0003-4922-7292\", \"0000-0002-4815-8953\", \"0000-0003-4222-2069\", \"0000-0002-7586-5477\", \"0000-0001-7531-6702\", \"0000-0002-1764-6137\", \"0000-0002-1936-2859\", \"0000-0002-1110-0694\", \"0000-0002-4580-1528\", \"0000-0003-2354-3537\", \"0000-0002-1112-0425\", \"0000-0003-0226-9700\", \"0000-0002-0750-4004\", \"0000-0003-0798-5503\", \"0000-0002-5724-3092\", \"0000-0002-8037-1148\", \"0000-0002-9075-6957\", \"0000-0003-0273-3948\", \"0000-0001-8105-0541\", \"0000-0001-5240-8946\", \"0000-0002-1255-6722\", \"0000-0002-2248-9076\", \"0000-0002-2482-3031\", \"0000-0001-6610-4716\", \"0000-0002-2679-385X\", \"0000-0001-7351-8749\", \"0000-0003-1870-2011\", \"0000-0002-5780-1605\", \"0000-0001-9548-6661\", \"0000-0001-7956-0921\", \"0000-0001-6077-1083\", \"0000-0002-4312-9694\", \"0000-0003-2726-990X\", \"0000-0002-2310-0990\", \"0000-0003-1218-6052\", \"0000-0003-4137-9291\", \"0000-0002-7546-3240\", \"0000-0001-9632-6459\", \"0000-0002-0317-7042\", \"0000-0002-7785-451X\", \"0000-0001-5329-7467\", \"0000-0002-1872-3667\", \"0000-0003-1180-8753\", \"0000-0003-0011-1520\", \"0000-0002-7218-6057\", \"0000-0003-0225-1324\", \"0000-0001-8055-8992\", \"0000-0002-3019-6791\", \"0000-0003-2859-603X\", \"0000-0001-9119-6026\", \"0000-0002-0587-2233\", \"0000-0002-9511-9754\", \"0000-0003-1517-3898\", \"0000-0003-3993-8541\", \"0000-0003-0208-4394\", \"0000-0003-2322-3859\", \"0000-0002-5105-7504\", \"0000-0002-6613-9166\", \"0000-0001-5793-9375\", \"0000-0002-8887-0646\", \"0000-0001-5618-4820\", \"0000-0001-8160-5658\", \"0000-0003-4635-2733\", \"0000-0002-4032-956X\", \"0000-0001-8926-0922\", \"0000-0002-4366-3395\", \"0000-0001-6056-0964\", \"0000-0002-3423-891X\", \"0000-0002-1547-4204\", \"0000-0002-4324-7121\", \"0000-0001-7540-3339\", \"0000-0001-8624-3027\", \"0000-0002-3475-2388\", \"0000-0001-5049-4721\", \"0000-0002-0549-2812\", \"0000-0003-4352-3234\", \"0000-0002-0285-6940\", \"0000-0001-5883-3337\", \"0000-0001-7918-7071\", \"0000-0003-0270-8849\", \"0000-0002-1059-9753\", \"0000-0002-7330-596X\", \"0000-0001-8805-3681\", \"0000-0003-0272-1883\", \"0000-0002-3679-6886\", \"0000-0003-3718-4774\", \"0000-0002-5438-2716\", \"0000-0002-4805-891X\", \"0000-0003-1151-7112\", \"0000-0002-1649-4661\", \"0000-0003-2956-5278\", \"0000-0002-8016-3111\", \"0000-0003-2827-1227\", \"0000-0003-2145-4717\", \"0000-0003-2028-9510\", \"0000-0003-4424-6881\", \"0000-0001-7474-2988\", \"0000-0002-2510-6501\", \"0000-0003-3582-6294\", \"0000-0002-1343-126X\", \"0000-0002-4595-2588\", \"0000-0002-9526-2876\", \"0000-0003-1158-7057\", \"0000-0002-3345-5154\", \"0000-0002-8244-6924\", \"0000-0003-4806-3951\", \"0000-0002-7464-3688\", \"0000-0003-2647-2359\", \"0000-0001-6073-387X\", \"0000-0001-8230-5606\", \"0000-0001-9108-194X\", \"0000-0002-1674-6427\", \"0000-0002-6355-1382\", \"0000-0003-3632-7673\", \"0000-0002-5140-4932\", \"0000-0003-2316-2347\", \"0000-0003-2793-1027\", \"0000-0002-8554-3738\", \"0000-0001-6673-083X\", \"0000-0001-7378-174X\", \"0000-0002-1614-9163\", \"0000-0002-8282-0330\", \"0000-0003-3527-7336\", \"0000-0001-7189-2147\", \"0000-0002-3344-3903\", \"0000-0003-1786-3489\", \"0000-0001-7108-8036\", \"0000-0003-4078-2193\", \"0000-0001-9352-3676\", \"0000-0001-9856-8531\", \"0000-0002-9276-8003\", \"0000-0003-0872-8082\", \"0000-0002-7824-1395\", \"0000-0001-5224-7353\", \"0000-0002-5281-2110\", \"0000-0002-3201-6256\", \"0000-0003-3697-1370\", \"0000-0002-7678-0520\", \"0000-0002-3741-371X\", \"0000-0002-8132-2356\", \"0000-0001-8074-1025\", \"0000-0002-7896-7268\", \"0000-0002-5536-6005\", \"0000-0002-5627-5594\", \"0000-0002-2790-8196\", \"0000-0002-4814-6303\", \"0000-0003-3563-8111\", \"0000-0002-3894-8185\", \"0000-0001-6608-759X\", \"0000-0001-6678-4133\", \"0000-0001-6893-5775\", \"0000-0002-9472-9307\", \"0000-0003-2441-0736\", \"0000-0002-8295-024X\", \"0000-0002-4322-6590\", \"0000-0001-9047-2156\", \"0000-0002-9072-6328\", \"0000-0002-1625-1774\", \"0000-0002-2315-0242\", \"0000-0001-6178-4337\", \"0000-0003-1272-8819\", \"0000-0003-1460-8458\", \"0000-0003-4091-5443\", \"0000-0001-6647-3570\", \"0000-0002-0756-4381\", \"0000-0002-7428-5307\", \"0000-0002-7699-0579\", \"0000-0002-3145-000X\", \"0000-0002-9249-230X\", \"0000-0002-0511-4600\", \"0000-0003-1621-0435\", \"0000-0002-6126-7150\", \"0000-0002-8161-6469\", \"0000-0002-9577-7263\", \"0000-0003-4087-3992\", \"0000-0002-1295-2450\", \"0000-0002-5135-5341\", \"0000-0002-7357-3104\", \"0000-0002-2913-3400\", \"0000-0003-0271-5678\", \"0000-0002-3427-5032\", \"0000-0002-2550-0496\", \"0000-0001-6399-6536\", \"0000-0001-7317-8000\", \"0000-0002-0357-9789\", \"0000-0002-8995-3531\", \"0000-0003-4421-9234\", \"0000-0002-6004-465X\", \"0000-0002-8739-4473\", \"0000-0002-5576-1865\", \"0000-0002-6978-9068\", \"0000-0001-9918-496X\", \"0000-0003-2243-8460\", \"0000-0003-2778-4266\", \"0000-0003-4318-6821\", \"0000-0002-7600-7845\", \"0000-0002-8469-1288\", \"0000-0003-4684-4148\", \"0000-0002-7312-128X\", \"0000-0001-5236-2001\", \"0000-0001-8692-9651\", \"0000-0002-8544-0202\", \"0000-0002-8884-3592\", \"0000-0002-1231-9903\", \"0000-0003-0407-7913\", \"0000-0001-7196-6679\", \"0000-0002-1319-1358\", \"0000-0001-7305-3240\", \"0000-0002-9910-2765\", \"0000-0003-4952-1491\", \"0000-0002-7466-2256\", \"0000-0002-9221-8730\", \"0000-0001-9116-7882\", \"0000-0002-8938-6004\", \"0000-0002-8684-4321\", \"0000-0003-0818-6607\", \"0000-0001-6798-4401\", \"0000-0001-6500-9047\", \"0000-0002-2693-5627\", \"0000-0002-7256-6340\", \"0000-0002-0194-6362\", \"0000-0002-5125-5092\", \"0000-0002-7197-2355\", \"0000-0002-4535-0958\", \"0000-0001-6476-7321\", \"0000-0003-3345-4757\", \"0000-0003-4179-1155\", \"0000-0002-5536-0156\", \"0000-0002-6206-8444\", \"0000-0001-6570-1594\", \"0000-0002-9711-0067\", \"0000-0001-5321-0392\", \"0000-0003-2488-1263\", \"0000-0003-2781-8198\", \"0000-0002-3739-6681\", \"0000-0003-3577-7963\", \"0000-0003-3286-3073\", \"0000-0001-8200-3333\", \"0000-0002-5813-2765\", \"0000-0002-6206-3563\", \"0000-0003-3660-364X\", \"0000-0002-4134-3566\", \"0000-0001-7768-9799\", \"0000-0001-5895-047X\", \"0000-0002-4722-8681\", \"0000-0001-9603-8908\", \"0000-0002-0772-1586\", \"0000-0001-6797-3964\", \"0000-0002-6834-901X\", \"0000-0002-1554-8306\", \"0000-0002-1879-4262\", \"0000-0002-9640-8136\", \"0000-0002-6926-3752\", \"0000-0002-3655-4713\", \"0000-0002-8724-1020\", \"0000-0002-4601-4569\"]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"title\": {\"text\": \"Activation distribution for bio \\\"more straightforward way to borrow the money you\\\"\"}, \"xaxis\": {\"tickangle\": 45, \"tickfont\": {\"size\": 12}}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('05a1aac8-0f09-4c92-9ab4-3f4da9a3eaa8');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"BIO_SNIPPET = 'more straightforward way to borrow the money you'\n",
"dup_bios_df = df[df.biography.str.contains(BIO_SNIPPET)]\n",
"# .groupby(df.activation_date.dt.month)[['orcid']].count().sort_values('orcid', ascending=False)\n",
"\n",
"set_top_n(50)\n",
"data = [\n",
" go.Histogram(\n",
" x=dup_bios_df['activation_date'],\n",
" y=dup_bios_df['orcid'],\n",
" histfunc=\"count\"\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Activation distribution for bio \"%s\"' % BIO_SNIPPET,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"fig.update_traces(xbins_size='D1')\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Dup bios with extended length**"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" </tr>\n",
" <tr>\n",
" <th>biography</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>car title loans are a more straightforward way to borrow the money you need, without dealing with the hassles of a traditional bank loan. because they use the equity value of your qualifying vehicle to secure funding, they are a great borrowing option for customers with credit issues, or who need expedited funding. our customers know to turn to our local experts when they need auto title loans. our team is excited to get you your loan today!</th>\n",
" <td>343</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hi, how are you? it is really cool to find an entire community of people interested in the same thing you are.</th>\n",
" <td>229</td>\n",
" </tr>\n",
" <tr>\n",
" <th>the sound and the fury is one of my all-time favorite novels but i have many.</th>\n",
" <td>218</td>\n",
" </tr>\n",
" <tr>\n",
" <th>one of my passions is people watching but i dont get to do it as much as i would like.</th>\n",
" <td>132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>why hello there. i can not believe i didnt know this community existed sooner.</th>\n",
" <td>131</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mütter, die sich vor der entbindung für einen rückbildungskurs anmelden, belügen sich selbst.denn das gleicht den vorsätzen zu neujahr: gut gemeint,</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>&lt;p class=p__2&gt;politically, taking on the affordable care act or not taking it on are both dangerous. while many citizens dont comprehend all that the</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>insaat kalip yagi, kalip yag, plywood kalip yagi, ahsap kalip yagi alanlarinda profesyonel ve organik olarak imalat yapan sirketimiz musteri goruslerini son derece onemsemektedir.</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>&lt;p class=p__7&gt;since life and medical insurance commissions are front-loaded, agents generally do not get a commission after the 3rd policy renewal.</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>ive traveled to several countries and have several more to see. i have a lizard named tinky.</th>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1619 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid\n",
"biography \n",
"car title loans are a more straightforward way ... 343\n",
"hi, how are you? it is really cool to find an e... 229\n",
"the sound and the fury is one of my all-time fa... 218\n",
"one of my passions is people watching but i don... 132\n",
"why hello there. i can not believe i didnt know... 131\n",
"... ...\n",
"mütter, die sich vor der entbindung für einen r... 2\n",
"<p class=p__2>politically, taking on the afford... 2\n",
"insaat kalip yagi, kalip yag, plywood kalip yag... 2\n",
"<p class=p__7>since life and medical insurance ... 2\n",
"ive traveled to several countries and have seve... 2\n",
"\n",
"[1619 rows x 1 columns]"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_bios[dup_bios.index.str.split(' ').str.len() > 10]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Assign spam score from precanned library**"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [],
"source": [
"# bios = df[df.biography.notna()][['orcid', 'biography']]"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [],
"source": [
"# def score(bio):\n",
"# try:\n",
"# return antispam.score(bio)\n",
"# except: # if len(bio) < 3 the filter doesn't know how to handle that\n",
"# return -1"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [],
"source": [
"# bios['spam_score'] = bios.biography.apply(lambda bio: score(bio))"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [],
"source": [
"# bios[bios.spam_score == -1] # these are artefacts (no scoring possible)"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [],
"source": [
"# bios.spam_score.replace(to_replace=-1, value=np.nan, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [],
"source": [
"# bios.spam_score.describe()"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [],
"source": [
"# bios[bios.spam_score > 0.99]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Spam goes nowhere."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Search offending words, sexually explicit content, etc.**"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [],
"source": [
"# bios['profanity_score'] = profanity_check.predict_prob(bios.biography)"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"# bios[bios.profanity_score > 0.90]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Profanity detection goes nowhere too."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## All VS All correlation"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"coloraxis": "coloraxis",
"hovertemplate": "x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>",
"name": "0",
"type": "heatmap",
"x": [
"verified_email",
"verified_primary_email",
"n_works",
"n_doi",
"n_arxiv",
"n_pmc",
"n_other_pids",
"label",
"n_emails",
"n_urls",
"n_ids",
"n_keywords",
"n_education",
"n_employment",
"n_ext_work_source",
"n_valid_education",
"n_valid_employment"
],
"xaxis": "x",
"y": [
"verified_email",
"verified_primary_email",
"n_works",
"n_doi",
"n_arxiv",
"n_pmc",
"n_other_pids",
"label",
"n_emails",
"n_urls",
"n_ids",
"n_keywords",
"n_education",
"n_employment",
"n_ext_work_source",
"n_valid_education",
"n_valid_employment"
],
"yaxis": "y",
"z": [
[
1,
0.9649829131836175,
0.07899833525811681,
0.07259719921935885,
0.0064613638682561435,
0.030614701011724112,
0.0606246420123506,
0.1240658917743258,
0.03267144560134065,
0.10712349577355784,
0.14475767748321952,
0.10243044622702734,
0.22284375415000315,
0.2240544946507108,
0.22551664240183317,
0.21764209610282456,
0.22363930402583765
],
[
0.9649829131836175,
1,
0.08183974046700901,
0.07518160639621203,
0.0066860590291805974,
0.031712353459948744,
0.06277678931008057,
0.12998640687807267,
0.03226243840878624,
0.11113100230411314,
0.15032740706571793,
0.10624021870034253,
0.23154024673662948,
0.23250920740301625,
0.2327233169990374,
0.22610062830832708,
0.2320611339644608
],
[
0.07899833525811681,
0.08183974046700901,
1,
0.9378726254398347,
0.3126299250047347,
0.35108563893979355,
0.8353346326813307,
0.22240613268720477,
0.03160640765461562,
0.14838588295615024,
0.37171182274445363,
0.1506365739780303,
0.13686193479055792,
0.21343320832924814,
0.4005951950706468,
0.127757181278294,
0.21359436822476655
],
[
0.07259719921935885,
0.07518160639621203,
0.9378726254398347,
1,
0.35605399617723354,
0.3624050122938972,
0.8018196175347003,
0.2178190759174422,
0.028320630207299337,
0.12192573243272048,
0.3525468899622581,
0.12916204989780206,
0.11736450285212531,
0.18470550214116468,
0.3834831573219326,
0.11103974478415263,
0.18991605863836233
],
[
0.0064613638682561435,
0.0066860590291805974,
0.3126299250047347,
0.35605399617723354,
1,
0.0009072282179230706,
0.2420914875526222,
0.01776888092417036,
0.002099309887982074,
0.0064144255162447246,
0.009269883208277058,
0.005433864001670957,
0.008619933999683011,
0.015077339853222701,
0.023203121401780318,
0.008480130351469113,
0.016480069731931876
],
[
0.030614701011724112,
0.031712353459948744,
0.35108563893979355,
0.3624050122938972,
0.0009072282179230706,
1,
0.2570742999530523,
0.0922145270760206,
0.00898053907910667,
0.04988227847309645,
0.08759311081674451,
0.04978801517329604,
0.05364308187508679,
0.09230230828045376,
0.15718240355316795,
0.05003145912334212,
0.09365323185411886
],
[
0.0606246420123506,
0.06277678931008057,
0.8353346326813307,
0.8018196175347003,
0.2420914875526222,
0.2570742999530523,
1,
0.17311687493499073,
0.022792492767692595,
0.12058320737626094,
0.3393283270986452,
0.11149215572697663,
0.09603598655375359,
0.16336464942113507,
0.32867917711898453,
0.08988706485108988,
0.1640726861251499
],
[
0.1240658917743258,
0.12998640687807267,
0.22240613268720477,
0.2178190759174422,
0.01776888092417036,
0.0922145270760206,
0.17311687493499073,
1,
0.03648260346059746,
0.08904343614326711,
0.2970254304693113,
0.11831548180884943,
0.16253031328738454,
0.19508445555508772,
0.4800924470705352,
0.15718163705271387,
0.20674076197089802
],
[
0.03267144560134065,
0.03226243840878624,
0.03160640765461562,
0.028320630207299337,
0.002099309887982074,
0.00898053907910667,
0.022792492767692595,
0.03648260346059746,
1,
0.07143241126539773,
0.06149968615329382,
0.07528446624958736,
0.0730249552881224,
0.07128594621281013,
0.07937133873035572,
0.07063161008579438,
0.06937988840793924
],
[
0.10712349577355784,
0.11113100230411314,
0.14838588295615024,
0.12192573243272048,
0.0064144255162447246,
0.04988227847309645,
0.12058320737626094,
0.08904343614326711,
0.07143241126539773,
1,
0.2085344284826277,
0.3756141239879568,
0.20860391435209405,
0.2439338448964409,
0.2262491070521664,
0.1960308197632984,
0.22734610676272235
],
[
0.14475767748321952,
0.15032740706571793,
0.37171182274445363,
0.3525468899622581,
0.009269883208277058,
0.08759311081674451,
0.3393283270986452,
0.2970254304693113,
0.06149968615329382,
0.2085344284826277,
1,
0.23998646957005906,
0.2584672204668393,
0.3193726129742757,
0.6563247307005879,
0.24730637320826065,
0.32652990120768727
],
[
0.10243044622702734,
0.10624021870034253,
0.1506365739780303,
0.12916204989780206,
0.005433864001670957,
0.04978801517329604,
0.11149215572697663,
0.11831548180884943,
0.07528446624958736,
0.3756141239879568,
0.23998646957005906,
1,
0.28174315114239534,
0.29513823401207673,
0.2602571143552704,
0.2671750926309919,
0.27827299388502297
],
[
0.22284375415000315,
0.23154024673662948,
0.13686193479055792,
0.11736450285212531,
0.008619933999683011,
0.05364308187508679,
0.09603598655375359,
0.16253031328738454,
0.0730249552881224,
0.20860391435209405,
0.2584672204668393,
0.28174315114239534,
1,
0.5935197907835382,
0.34969846406582145,
0.9580202466838004,
0.5673220091683485
],
[
0.2240544946507108,
0.23250920740301625,
0.21343320832924814,
0.18470550214116468,
0.015077339853222701,
0.09230230828045376,
0.16336464942113507,
0.19508445555508772,
0.07128594621281013,
0.2439338448964409,
0.3193726129742757,
0.29513823401207673,
0.5935197907835382,
1,
0.4068774187637994,
0.5657949950743488,
0.9213956311003227
],
[
0.22551664240183317,
0.2327233169990374,
0.4005951950706468,
0.3834831573219326,
0.023203121401780318,
0.15718240355316795,
0.32867917711898453,
0.4800924470705352,
0.07937133873035572,
0.2262491070521664,
0.6563247307005879,
0.2602571143552704,
0.34969846406582145,
0.4068774187637994,
1,
0.33742739872409666,
0.41450829231197867
],
[
0.21764209610282456,
0.22610062830832708,
0.127757181278294,
0.11103974478415263,
0.008480130351469113,
0.05003145912334212,
0.08988706485108988,
0.15718163705271387,
0.07063161008579438,
0.1960308197632984,
0.24730637320826065,
0.2671750926309919,
0.9580202466838004,
0.5657949950743488,
0.33742739872409666,
1,
0.5703564611231601
],
[
0.22363930402583765,
0.2320611339644608,
0.21359436822476655,
0.18991605863836233,
0.016480069731931876,
0.09365323185411886,
0.1640726861251499,
0.20674076197089802,
0.06937988840793924,
0.22734610676272235,
0.32652990120768727,
0.27827299388502297,
0.5673220091683485,
0.9213956311003227,
0.41450829231197867,
0.5703564611231601,
1
]
]
}
],
"layout": {
"coloraxis": {
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"margin": {
"t": 60
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"xaxis": {
"anchor": "y",
"constrain": "domain",
"domain": [
0,
1
],
"scaleanchor": "y"
},
"yaxis": {
"anchor": "x",
"autorange": "reversed",
"constrain": "domain",
"domain": [
0,
1
]
}
}
},
"text/html": [
"<div> <div id=\"00b299ff-f445-4fcc-b987-26ae1fab2d7b\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"00b299ff-f445-4fcc-b987-26ae1fab2d7b\")) { Plotly.newPlot( \"00b299ff-f445-4fcc-b987-26ae1fab2d7b\", [{\"coloraxis\": \"coloraxis\", \"hovertemplate\": \"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\", \"name\": \"0\", \"type\": \"heatmap\", \"x\": [\"verified_email\", \"verified_primary_email\", \"n_works\", \"n_doi\", \"n_arxiv\", \"n_pmc\", \"n_other_pids\", \"label\", \"n_emails\", \"n_urls\", \"n_ids\", \"n_keywords\", \"n_education\", \"n_employment\", \"n_ext_work_source\", \"n_valid_education\", \"n_valid_employment\"], \"xaxis\": \"x\", \"y\": [\"verified_email\", \"verified_primary_email\", \"n_works\", \"n_doi\", \"n_arxiv\", \"n_pmc\", \"n_other_pids\", \"label\", \"n_emails\", \"n_urls\", \"n_ids\", \"n_keywords\", \"n_education\", \"n_employment\", \"n_ext_work_source\", \"n_valid_education\", \"n_valid_employment\"], \"yaxis\": \"y\", \"z\": [[1.0, 0.9649829131836175, 0.07899833525811681, 0.07259719921935885, 0.0064613638682561435, 0.030614701011724112, 0.0606246420123506, 0.1240658917743258, 0.03267144560134065, 0.10712349577355784, 0.14475767748321952, 0.10243044622702734, 0.22284375415000315, 0.2240544946507108, 0.22551664240183317, 0.21764209610282456, 0.22363930402583765], [0.9649829131836175, 1.0, 0.08183974046700901, 0.07518160639621203, 0.0066860590291805974, 0.031712353459948744, 0.06277678931008057, 0.12998640687807267, 0.03226243840878624, 0.11113100230411314, 0.15032740706571793, 0.10624021870034253, 0.23154024673662948, 0.23250920740301625, 0.2327233169990374, 0.22610062830832708, 0.2320611339644608], [0.07899833525811681, 0.08183974046700901, 1.0, 0.9378726254398347, 0.3126299250047347, 0.35108563893979355, 0.8353346326813307, 0.22240613268720477, 0.03160640765461562, 0.14838588295615024, 0.37171182274445363, 0.1506365739780303, 0.13686193479055792, 0.21343320832924814, 0.4005951950706468, 0.127757181278294, 0.21359436822476655], [0.07259719921935885, 0.07518160639621203, 0.9378726254398347, 1.0, 0.35605399617723354, 0.3624050122938972, 0.8018196175347003, 0.2178190759174422, 0.028320630207299337, 0.12192573243272048, 0.3525468899622581, 0.12916204989780206, 0.11736450285212531, 0.18470550214116468, 0.3834831573219326, 0.11103974478415263, 0.18991605863836233], [0.0064613638682561435, 0.0066860590291805974, 0.3126299250047347, 0.35605399617723354, 1.0, 0.0009072282179230706, 0.2420914875526222, 0.01776888092417036, 0.002099309887982074, 0.0064144255162447246, 0.009269883208277058, 0.005433864001670957, 0.008619933999683011, 0.015077339853222701, 0.023203121401780318, 0.008480130351469113, 0.016480069731931876], [0.030614701011724112, 0.031712353459948744, 0.35108563893979355, 0.3624050122938972, 0.0009072282179230706, 1.0, 0.2570742999530523, 0.0922145270760206, 0.00898053907910667, 0.04988227847309645, 0.08759311081674451, 0.04978801517329604, 0.05364308187508679, 0.09230230828045376, 0.15718240355316795, 0.05003145912334212, 0.09365323185411886], [0.0606246420123506, 0.06277678931008057, 0.8353346326813307, 0.8018196175347003, 0.2420914875526222, 0.2570742999530523, 1.0, 0.17311687493499073, 0.022792492767692595, 0.12058320737626094, 0.3393283270986452, 0.11149215572697663, 0.09603598655375359, 0.16336464942113507, 0.32867917711898453, 0.08988706485108988, 0.1640726861251499], [0.1240658917743258, 0.12998640687807267, 0.22240613268720477, 0.2178190759174422, 0.01776888092417036, 0.0922145270760206, 0.17311687493499073, 1.0, 0.03648260346059746, 0.08904343614326711, 0.2970254304693113, 0.11831548180884943, 0.16253031328738454, 0.19508445555508772, 0.4800924470705352, 0.15718163705271387, 0.20674076197089802], [0.03267144560134065, 0.03226243840878624, 0.03160640765461562, 0.028320630207299337, 0.002099309887982074, 0.00898053907910667, 0.022792492767692595, 0.03648260346059746, 1.0, 0.07143241126539773, 0.06149968615329382, 0.07528446624958736, 0.0730249552881224, 0.07128594621281013, 0.07937133873035572, 0.07063161008579438, 0.06937988840793924], [0.10712349577355784, 0.11113100230411314, 0.14838588295615024, 0.12192573243272048, 0.0064144255162447246, 0.04988227847309645, 0.12058320737626094, 0.08904343614326711, 0.07143241126539773, 1.0, 0.2085344284826277, 0.3756141239879568, 0.20860391435209405, 0.2439338448964409, 0.2262491070521664, 0.1960308197632984, 0.22734610676272235], [0.14475767748321952, 0.15032740706571793, 0.37171182274445363, 0.3525468899622581, 0.009269883208277058, 0.08759311081674451, 0.3393283270986452, 0.2970254304693113, 0.06149968615329382, 0.2085344284826277, 1.0, 0.23998646957005906, 0.2584672204668393, 0.3193726129742757, 0.6563247307005879, 0.24730637320826065, 0.32652990120768727], [0.10243044622702734, 0.10624021870034253, 0.1506365739780303, 0.12916204989780206, 0.005433864001670957, 0.04978801517329604, 0.11149215572697663, 0.11831548180884943, 0.07528446624958736, 0.3756141239879568, 0.23998646957005906, 1.0, 0.28174315114239534, 0.29513823401207673, 0.2602571143552704, 0.2671750926309919, 0.27827299388502297], [0.22284375415000315, 0.23154024673662948, 0.13686193479055792, 0.11736450285212531, 0.008619933999683011, 0.05364308187508679, 0.09603598655375359, 0.16253031328738454, 0.0730249552881224, 0.20860391435209405, 0.2584672204668393, 0.28174315114239534, 1.0, 0.5935197907835382, 0.34969846406582145, 0.9580202466838004, 0.5673220091683485], [0.2240544946507108, 0.23250920740301625, 0.21343320832924814, 0.18470550214116468, 0.015077339853222701, 0.09230230828045376, 0.16336464942113507, 0.19508445555508772, 0.07128594621281013, 0.2439338448964409, 0.3193726129742757, 0.29513823401207673, 0.5935197907835382, 1.0, 0.4068774187637994, 0.5657949950743488, 0.9213956311003227], [0.22551664240183317, 0.2327233169990374, 0.4005951950706468, 0.3834831573219326, 0.023203121401780318, 0.15718240355316795, 0.32867917711898453, 0.4800924470705352, 0.07937133873035572, 0.2262491070521664, 0.6563247307005879, 0.2602571143552704, 0.34969846406582145, 0.4068774187637994, 1.0, 0.33742739872409666, 0.41450829231197867], [0.21764209610282456, 0.22610062830832708, 0.127757181278294, 0.11103974478415263, 0.008480130351469113, 0.05003145912334212, 0.08988706485108988, 0.15718163705271387, 0.07063161008579438, 0.1960308197632984, 0.24730637320826065, 0.2671750926309919, 0.9580202466838004, 0.5657949950743488, 0.33742739872409666, 1.0, 0.5703564611231601], [0.22363930402583765, 0.2320611339644608, 0.21359436822476655, 0.18991605863836233, 0.016480069731931876, 0.09365323185411886, 0.1640726861251499, 0.20674076197089802, 0.06937988840793924, 0.22734610676272235, 0.32652990120768727, 0.27827299388502297, 0.5673220091683485, 0.9213956311003227, 0.41450829231197867, 0.5703564611231601, 1.0]]}], {\"coloraxis\": {\"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"margin\": {\"t\": 60}, \"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"xaxis\": {\"anchor\": \"y\", \"constrain\": \"domain\", \"domain\": [0.0, 1.0], \"scaleanchor\": \"y\"}, \"yaxis\": {\"anchor\": \"x\", \"autorange\": \"reversed\", \"constrain\": \"domain\", \"domain\": [0.0, 1.0]}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('00b299ff-f445-4fcc-b987-26ae1fab2d7b');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = px.imshow(df.select_dtypes(include=['bool','number']).fillna(-1).corr())\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"coloraxis": "coloraxis",
"hovertemplate": "x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>",
"name": "0",
"type": "heatmap",
"x": [
"verified_email",
"verified_primary_email",
"n_works",
"n_doi",
"n_arxiv",
"n_pmc",
"n_other_pids",
"label",
"n_emails",
"n_urls",
"n_ids",
"n_keywords",
"n_education",
"n_employment",
"n_ext_work_source",
"n_valid_education",
"n_valid_employment"
],
"xaxis": "x",
"y": [
"verified_email",
"verified_primary_email",
"n_works",
"n_doi",
"n_arxiv",
"n_pmc",
"n_other_pids",
"label",
"n_emails",
"n_urls",
"n_ids",
"n_keywords",
"n_education",
"n_employment",
"n_ext_work_source",
"n_valid_education",
"n_valid_employment"
],
"yaxis": "y",
"z": [
[
1,
0.9752519595075795,
0.09214895262049282,
0.0879798648550178,
0.007961003694278242,
0.03915260384340702,
0.07001890457066572,
null,
0.032103975497214034,
0.10336686548032378,
0.17703441827034647,
0.09979570243186875,
0.20774401162922493,
0.21084239631411816,
0.2794253838221325,
0.20026368730516023,
0.20839469730166119
],
[
0.9752519595075795,
1,
0.09431761242203819,
0.0900004773636232,
0.008106110602443744,
0.0400496723953056,
0.07165587098532082,
null,
0.031351331970120695,
0.1057825589480781,
0.18090502876600972,
0.10202726190110512,
0.21260169168082205,
0.2157742765417939,
0.28374005567757044,
0.20494936344834785,
0.21327740577748075
],
[
0.09214895262049282,
0.09431761242203819,
1,
0.9442390478594899,
0.31282355837318465,
0.3401860050239849,
0.8368930144083708,
null,
0.027114018965021653,
0.18960427446693856,
0.3701087201755157,
0.15361031273509804,
0.12788627071415765,
0.2198910262585617,
0.3751032536226483,
0.11578712066593932,
0.209242892554831
],
[
0.0879798648550178,
0.0900004773636232,
0.9442390478594899,
1,
0.35310861748965644,
0.34565981353619407,
0.7975802646734866,
null,
0.024895371173298486,
0.1595012184956836,
0.3538795299117989,
0.135983848799029,
0.1128746129637302,
0.19696501794506754,
0.36561758816157264,
0.1034718692729805,
0.19174387277711163
],
[
0.007961003694278242,
0.008106110602443744,
0.31282355837318465,
0.35310861748965644,
1,
-0.0011670424954112538,
0.23678033932846554,
null,
0.00211588954934983,
0.006214909337050867,
0.002381747483995223,
0.003269225741378764,
0.006500333994954841,
0.01471161112308708,
0.015370101748810604,
0.006563821087291732,
0.016026029845563342
],
[
0.03915260384340702,
0.0400496723953056,
0.3401860050239849,
0.34565981353619407,
-0.0011670424954112538,
1,
0.26153038636966397,
null,
0.007101020905289937,
0.0706709846169798,
0.0752778645740394,
0.053848881014517465,
0.05709771456889447,
0.11004375574208852,
0.15537939152071614,
0.05119882882780883,
0.10510621016856672
],
[
0.07001890457066572,
0.07165587098532082,
0.8368930144083708,
0.7975802646734866,
0.23678033932846554,
0.26153038636966397,
1,
null,
0.018655203426401264,
0.1586281614132468,
0.3420993209878049,
0.11384257986969694,
0.08705808615401064,
0.17010426408809934,
0.3094217227338286,
0.07827740920727895,
0.16071127514146832
],
[
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
],
[
0.032103975497214034,
0.031351331970120695,
0.027114018965021653,
0.024895371173298486,
0.00211588954934983,
0.007101020905289937,
0.018655203426401264,
null,
1,
0.0909747866140844,
0.059014690682252895,
0.07944382351916351,
0.07321093131359531,
0.07382471873049441,
0.07579662164259422,
0.06968465202615097,
0.06920049140404959
],
[
0.10336686548032378,
0.1057825589480781,
0.18960427446693856,
0.1595012184956836,
0.006214909337050867,
0.0706709846169798,
0.1586281614132468,
null,
0.0909747866140844,
1,
0.2860124991441231,
0.4211551575366347,
0.2711332131723409,
0.3238915131948033,
0.2936726355013525,
0.25090179075226243,
0.29834737277315043
],
[
0.17703441827034647,
0.18090502876600972,
0.3701087201755157,
0.3538795299117989,
0.002381747483995223,
0.0752778645740394,
0.3420993209878049,
null,
0.059014690682252895,
0.2860124991441231,
1,
0.26309244439778146,
0.2715744156799263,
0.3362043821086919,
0.6319869414234904,
0.25446047854497,
0.32602473926417674
],
[
0.09979570243186875,
0.10202726190110512,
0.15361031273509804,
0.135983848799029,
0.003269225741378764,
0.053848881014517465,
0.11384257986969694,
null,
0.07944382351916351,
0.4211551575366347,
0.26309244439778146,
1,
0.29286664709583443,
0.31291112451448255,
0.26474267153169084,
0.27215977673427116,
0.28974686127811694
],
[
0.20774401162922493,
0.21260169168082205,
0.12788627071415765,
0.1128746129637302,
0.006500333994954841,
0.05709771456889447,
0.08705808615401064,
null,
0.07321093131359531,
0.2711332131723409,
0.2715744156799263,
0.29286664709583443,
1,
0.5957235494782747,
0.3483340934729055,
0.9529312090041255,
0.5718356944892686
],
[
0.21084239631411816,
0.2157742765417939,
0.2198910262585617,
0.19696501794506754,
0.01471161112308708,
0.11004375574208852,
0.17010426408809934,
null,
0.07382471873049441,
0.3238915131948033,
0.3362043821086919,
0.31291112451448255,
0.5957235494782747,
1,
0.3981500704710164,
0.5634978540193307,
0.9281296871523289
],
[
0.2794253838221325,
0.28374005567757044,
0.3751032536226483,
0.36561758816157264,
0.015370101748810604,
0.15537939152071614,
0.3094217227338286,
null,
0.07579662164259422,
0.2936726355013525,
0.6319869414234904,
0.26474267153169084,
0.3483340934729055,
0.3981500704710164,
1,
0.3302303910036832,
0.38651045476180806
],
[
0.20026368730516023,
0.20494936344834785,
0.11578712066593932,
0.1034718692729805,
0.006563821087291732,
0.05119882882780883,
0.07827740920727895,
null,
0.06968465202615097,
0.25090179075226243,
0.25446047854497,
0.27215977673427116,
0.9529312090041255,
0.5634978540193307,
0.3302303910036832,
1,
0.5748499965968074
],
[
0.20839469730166119,
0.21327740577748075,
0.209242892554831,
0.19174387277711163,
0.016026029845563342,
0.10510621016856672,
0.16071127514146832,
null,
0.06920049140404959,
0.29834737277315043,
0.32602473926417674,
0.28974686127811694,
0.5718356944892686,
0.9281296871523289,
0.38651045476180806,
0.5748499965968074,
1
]
]
}
],
"layout": {
"coloraxis": {
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"margin": {
"t": 60
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"xaxis": {
"anchor": "y",
"constrain": "domain",
"domain": [
0,
1
],
"scaleanchor": "y"
},
"yaxis": {
"anchor": "x",
"autorange": "reversed",
"constrain": "domain",
"domain": [
0,
1
]
}
}
},
"text/html": [
"<div> <div id=\"4a0d9f50-4b16-43b0-9533-8bf000ea3f0f\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"4a0d9f50-4b16-43b0-9533-8bf000ea3f0f\")) { Plotly.newPlot( \"4a0d9f50-4b16-43b0-9533-8bf000ea3f0f\", [{\"coloraxis\": \"coloraxis\", \"hovertemplate\": \"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\", \"name\": \"0\", \"type\": \"heatmap\", \"x\": [\"verified_email\", \"verified_primary_email\", \"n_works\", \"n_doi\", \"n_arxiv\", \"n_pmc\", \"n_other_pids\", \"label\", \"n_emails\", \"n_urls\", \"n_ids\", \"n_keywords\", \"n_education\", \"n_employment\", \"n_ext_work_source\", \"n_valid_education\", \"n_valid_employment\"], \"xaxis\": \"x\", \"y\": [\"verified_email\", \"verified_primary_email\", \"n_works\", \"n_doi\", \"n_arxiv\", \"n_pmc\", \"n_other_pids\", \"label\", \"n_emails\", \"n_urls\", \"n_ids\", \"n_keywords\", \"n_education\", \"n_employment\", \"n_ext_work_source\", \"n_valid_education\", \"n_valid_employment\"], \"yaxis\": \"y\", \"z\": [[1.0, 0.9752519595075795, 0.09214895262049282, 0.0879798648550178, 0.007961003694278242, 0.03915260384340702, 0.07001890457066572, null, 0.032103975497214034, 0.10336686548032378, 0.17703441827034647, 0.09979570243186875, 0.20774401162922493, 0.21084239631411816, 0.2794253838221325, 0.20026368730516023, 0.20839469730166119], [0.9752519595075795, 1.0, 0.09431761242203819, 0.0900004773636232, 0.008106110602443744, 0.0400496723953056, 0.07165587098532082, null, 0.031351331970120695, 0.1057825589480781, 0.18090502876600972, 0.10202726190110512, 0.21260169168082205, 0.2157742765417939, 0.28374005567757044, 0.20494936344834785, 0.21327740577748075], [0.09214895262049282, 0.09431761242203819, 1.0, 0.9442390478594899, 0.31282355837318465, 0.3401860050239849, 0.8368930144083708, null, 0.027114018965021653, 0.18960427446693856, 0.3701087201755157, 0.15361031273509804, 0.12788627071415765, 0.2198910262585617, 0.3751032536226483, 0.11578712066593932, 0.209242892554831], [0.0879798648550178, 0.0900004773636232, 0.9442390478594899, 1.0, 0.35310861748965644, 0.34565981353619407, 0.7975802646734866, null, 0.024895371173298486, 0.1595012184956836, 0.3538795299117989, 0.135983848799029, 0.1128746129637302, 0.19696501794506754, 0.36561758816157264, 0.1034718692729805, 0.19174387277711163], [0.007961003694278242, 0.008106110602443744, 0.31282355837318465, 0.35310861748965644, 1.0, -0.0011670424954112538, 0.23678033932846554, null, 0.00211588954934983, 0.006214909337050867, 0.002381747483995223, 0.003269225741378764, 0.006500333994954841, 0.01471161112308708, 0.015370101748810604, 0.006563821087291732, 0.016026029845563342], [0.03915260384340702, 0.0400496723953056, 0.3401860050239849, 0.34565981353619407, -0.0011670424954112538, 1.0, 0.26153038636966397, null, 0.007101020905289937, 0.0706709846169798, 0.0752778645740394, 0.053848881014517465, 0.05709771456889447, 0.11004375574208852, 0.15537939152071614, 0.05119882882780883, 0.10510621016856672], [0.07001890457066572, 0.07165587098532082, 0.8368930144083708, 0.7975802646734866, 0.23678033932846554, 0.26153038636966397, 1.0, null, 0.018655203426401264, 0.1586281614132468, 0.3420993209878049, 0.11384257986969694, 0.08705808615401064, 0.17010426408809934, 0.3094217227338286, 0.07827740920727895, 0.16071127514146832], [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], [0.032103975497214034, 0.031351331970120695, 0.027114018965021653, 0.024895371173298486, 0.00211588954934983, 0.007101020905289937, 0.018655203426401264, null, 1.0, 0.0909747866140844, 0.059014690682252895, 0.07944382351916351, 0.07321093131359531, 0.07382471873049441, 0.07579662164259422, 0.06968465202615097, 0.06920049140404959], [0.10336686548032378, 0.1057825589480781, 0.18960427446693856, 0.1595012184956836, 0.006214909337050867, 0.0706709846169798, 0.1586281614132468, null, 0.0909747866140844, 1.0, 0.2860124991441231, 0.4211551575366347, 0.2711332131723409, 0.3238915131948033, 0.2936726355013525, 0.25090179075226243, 0.29834737277315043], [0.17703441827034647, 0.18090502876600972, 0.3701087201755157, 0.3538795299117989, 0.002381747483995223, 0.0752778645740394, 0.3420993209878049, null, 0.059014690682252895, 0.2860124991441231, 1.0, 0.26309244439778146, 0.2715744156799263, 0.3362043821086919, 0.6319869414234904, 0.25446047854497, 0.32602473926417674], [0.09979570243186875, 0.10202726190110512, 0.15361031273509804, 0.135983848799029, 0.003269225741378764, 0.053848881014517465, 0.11384257986969694, null, 0.07944382351916351, 0.4211551575366347, 0.26309244439778146, 1.0, 0.29286664709583443, 0.31291112451448255, 0.26474267153169084, 0.27215977673427116, 0.28974686127811694], [0.20774401162922493, 0.21260169168082205, 0.12788627071415765, 0.1128746129637302, 0.006500333994954841, 0.05709771456889447, 0.08705808615401064, null, 0.07321093131359531, 0.2711332131723409, 0.2715744156799263, 0.29286664709583443, 1.0, 0.5957235494782747, 0.3483340934729055, 0.9529312090041255, 0.5718356944892686], [0.21084239631411816, 0.2157742765417939, 0.2198910262585617, 0.19696501794506754, 0.01471161112308708, 0.11004375574208852, 0.17010426408809934, null, 0.07382471873049441, 0.3238915131948033, 0.3362043821086919, 0.31291112451448255, 0.5957235494782747, 1.0, 0.3981500704710164, 0.5634978540193307, 0.9281296871523289], [0.2794253838221325, 0.28374005567757044, 0.3751032536226483, 0.36561758816157264, 0.015370101748810604, 0.15537939152071614, 0.3094217227338286, null, 0.07579662164259422, 0.2936726355013525, 0.6319869414234904, 0.26474267153169084, 0.3483340934729055, 0.3981500704710164, 1.0, 0.3302303910036832, 0.38651045476180806], [0.20026368730516023, 0.20494936344834785, 0.11578712066593932, 0.1034718692729805, 0.006563821087291732, 0.05119882882780883, 0.07827740920727895, null, 0.06968465202615097, 0.25090179075226243, 0.25446047854497, 0.27215977673427116, 0.9529312090041255, 0.5634978540193307, 0.3302303910036832, 1.0, 0.5748499965968074], [0.20839469730166119, 0.21327740577748075, 0.209242892554831, 0.19174387277711163, 0.016026029845563342, 0.10510621016856672, 0.16071127514146832, null, 0.06920049140404959, 0.29834737277315043, 0.32602473926417674, 0.28974686127811694, 0.5718356944892686, 0.9281296871523289, 0.38651045476180806, 0.5748499965968074, 1.0]]}], {\"coloraxis\": {\"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"margin\": {\"t\": 60}, \"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"mesh3d\"}], \"parcoords\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"parcoords\"}], \"pie\": [{\"automargin\": true, \"type\": \"pie\"}], \"scatter\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter\"}], \"scatter3d\": [{\"line\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatter3d\"}], \"scattercarpet\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattercarpet\"}], \"scattergeo\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergeo\"}], \"scattergl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattergl\"}], \"scattermapbox\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scattermapbox\"}], \"scatterpolar\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolar\"}], \"scatterpolargl\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterpolargl\"}], \"scatterternary\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"scatterternary\"}], \"surface\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"surface\"}], \"table\": [{\"cells\": {\"fill\": {\"color\": \"#EBF0F8\"}, \"line\": {\"color\": \"white\"}}, \"header\": {\"fill\": {\"color\": \"#C8D4E3\"}, \"line\": {\"color\": \"white\"}}, \"type\": \"table\"}]}, \"layout\": {\"annotationdefaults\": {\"arrowcolor\": \"#2a3f5f\", \"arrowhead\": 0, \"arrowwidth\": 1}, \"autotypenumbers\": \"strict\", \"coloraxis\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"colorscale\": {\"diverging\": [[0, \"#8e0152\"], [0.1, \"#c51b7d\"], [0.2, \"#de77ae\"], [0.3, \"#f1b6da\"], [0.4, \"#fde0ef\"], [0.5, \"#f7f7f7\"], [0.6, \"#e6f5d0\"], [0.7, \"#b8e186\"], [0.8, \"#7fbc41\"], [0.9, \"#4d9221\"], [1, \"#276419\"]], \"sequential\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"sequentialminus\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]]}, \"colorway\": [\"#636efa\", \"#EF553B\", \"#00cc96\", \"#ab63fa\", \"#FFA15A\", \"#19d3f3\", \"#FF6692\", \"#B6E880\", \"#FF97FF\", \"#FECB52\"], \"font\": {\"color\": \"#2a3f5f\"}, \"geo\": {\"bgcolor\": \"white\", \"lakecolor\": \"white\", \"landcolor\": \"#E5ECF6\", \"showlakes\": true, \"showland\": true, \"subunitcolor\": \"white\"}, \"hoverlabel\": {\"align\": \"left\"}, \"hovermode\": \"closest\", \"mapbox\": {\"style\": \"light\"}, \"paper_bgcolor\": \"white\", \"plot_bgcolor\": \"#E5ECF6\", \"polar\": {\"angularaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"radialaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"scene\": {\"xaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"yaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}, \"zaxis\": {\"backgroundcolor\": \"#E5ECF6\", \"gridcolor\": \"white\", \"gridwidth\": 2, \"linecolor\": \"white\", \"showbackground\": true, \"ticks\": \"\", \"zerolinecolor\": \"white\"}}, \"shapedefaults\": {\"line\": {\"color\": \"#2a3f5f\"}}, \"ternary\": {\"aaxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"baxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}, \"bgcolor\": \"#E5ECF6\", \"caxis\": {\"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\"}}, \"title\": {\"x\": 0.05}, \"xaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}, \"yaxis\": {\"automargin\": true, \"gridcolor\": \"white\", \"linecolor\": \"white\", \"ticks\": \"\", \"title\": {\"standoff\": 15}, \"zerolinecolor\": \"white\", \"zerolinewidth\": 2}}}, \"xaxis\": {\"anchor\": \"y\", \"constrain\": \"domain\", \"domain\": [0.0, 1.0], \"scaleanchor\": \"y\"}, \"yaxis\": {\"anchor\": \"x\", \"autorange\": \"reversed\", \"constrain\": \"domain\", \"domain\": [0.0, 1.0]}}, {\"responsive\": true} ).then(function(){\n",
" \n",
"var gd = document.getElementById('4a0d9f50-4b16-43b0-9533-8bf000ea3f0f');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = px.imshow(df[df.label == True].select_dtypes(include=['bool','number']).fillna(-1).corr())\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [],
"source": [
"# df[['verified_email', \n",
"# 'verified_primary_email', \n",
"# 'n_works', \n",
"# 'n_doi',\n",
"# 'n_arxiv', \n",
"# 'n_pmc', \n",
"# 'n_other_pids', \n",
"# 'n_emails', \n",
"# 'n_urls', \n",
"# 'n_ids', \n",
"# 'n_keywords', \n",
"# 'n_employment', \n",
"# 'n_education', \n",
"# 'label']].to_pickle('../data/processed/features.pkl')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 10989649 entries, 0 to 10989648\n",
"Data columns (total 35 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 orcid object \n",
" 1 verified_email bool \n",
" 2 verified_primary_email bool \n",
" 3 given_names string \n",
" 4 family_name string \n",
" 5 biography string \n",
" 6 other_names object \n",
" 7 primary_email string \n",
" 8 keywords object \n",
" 9 external_ids object \n",
" 10 education object \n",
" 11 employment object \n",
" 12 n_works Int16 \n",
" 13 works_source object \n",
" 14 activation_date datetime64[ns, UTC]\n",
" 15 last_update_date datetime64[ns, UTC]\n",
" 16 n_doi Int16 \n",
" 17 n_arxiv Int16 \n",
" 18 n_pmc Int16 \n",
" 19 n_other_pids Int16 \n",
" 20 label bool \n",
" 21 primary_email_domain object \n",
" 22 other_email_domains object \n",
" 23 url_domains object \n",
" 24 n_emails Int16 \n",
" 25 n_urls Int16 \n",
" 26 n_ids Int16 \n",
" 27 n_keywords Int16 \n",
" 28 n_education Int16 \n",
" 29 n_employment Int16 \n",
" 30 ext_works_source object \n",
" 31 n_ext_work_source Int16 \n",
" 32 authoritative object \n",
" 33 n_valid_education float64 \n",
" 34 n_valid_employment float64 \n",
"dtypes: Int16(12), bool(3), datetime64[ns, UTC](2), float64(2), object(12), string(4)\n",
"memory usage: 2.1+ GB\n"
]
}
],
"source": [
"df.info()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}