fake-orcid-analysis/notebooks/01-Exploration.ipynb

16463 lines
588 KiB
Plaintext
Raw Normal View History

2021-03-18 17:43:00 +01:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-19 12:19:45 +01:00
"# Exploratory analysis"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"TODO:\n",
"- Understanding the reason for fake profiles can bring insight on how to catch them (could be trivial with prior knowledge, e.g., SEO hacking => URLs)\n",
"- Make casistics (e.g. author publishing with empty orcid, author publishing but not on OpenAIRE, etc.)\n",
"- Temporal dimension of any use?\n",
"- Can we access private info thanks to the OpenAIRE-ORCID agreement?\n"
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 1,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" <script type=\"text/javascript\">\n",
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
" if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
" if (typeof require !== 'undefined') {\n",
" require.undef(\"plotly\");\n",
" requirejs.config({\n",
" paths: {\n",
" 'plotly': ['https://cdn.plot.ly/plotly-latest.min']\n",
" }\n",
" });\n",
" require(['plotly'], function(Plotly) {\n",
" window._Plotly = Plotly;\n",
" });\n",
" }\n",
" </script>\n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2021-03-24 13:33:01 +01:00
"import glob\n",
"\n",
2021-03-18 17:43:00 +01:00
"import pandas as pd\n",
"import ast\n",
"import tldextract\n",
2021-03-25 15:20:06 +01:00
"import numpy as np\n",
"\n",
"import antispam\n",
2021-03-18 17:43:00 +01:00
"\n",
"import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n",
2021-03-22 19:08:20 +01:00
"import plotly.express as px\n",
2021-03-18 17:43:00 +01:00
"\n",
"init_notebook_mode(connected=True)\n",
2021-03-23 09:35:35 +01:00
"TOP_N = 0\n",
"TOP_RANGE = [0, 0]\n",
2021-03-25 15:20:06 +01:00
"\n",
2021-03-23 09:35:35 +01:00
"def set_top_n(n):\n",
" global TOP_N, TOP_RANGE\n",
" TOP_N = n\n",
2021-03-25 15:20:06 +01:00
" TOP_RANGE = [-.5, n - 1 + .5]\n",
" \n",
"pd.set_option('display.max_columns', None)"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Notable solid ORCID iDs for explorative purposes:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"AM = '0000-0002-5193-7851'\n",
"PP = '0000-0002-8588-4196'\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-23 19:03:37 +01:00
"Notable anomalies:"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 3,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [],
"source": [
"JOURNAL = '0000-0003-1815-5732'\n",
2021-03-23 12:13:04 +01:00
"NOINFO = '0000-0001-5009-2052'\n",
"VALID_NO_OA = '0000-0002-5154-6404' # True profile, but not in OpenAIRE\n",
2021-03-22 19:08:20 +01:00
"# todo: find group-shared ORCiD, if possible"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-23 19:03:37 +01:00
"Notable fake ORCID iDs:"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"SCAFFOLD = '0000-0001-5004-7761'\n",
"WHATSAPP = '0000-0001-6997-9470'\n",
"PENIS = '0000-0002-3399-7287'\n",
"BITCOIN = '0000-0002-7518-6845'\n",
"FITNESS_CHINA = '0000-0002-1234-835X' # URL record + employment\n",
"CANNABIS = '0000-0002-9025-8632' # URL > 70 + works (REMOVED)\n",
"PLUMBER = '0000-0002-1700-8311' # URL > 10 + works "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load the dataset"
]
},
{
"cell_type": "code",
"execution_count": 5,
2021-03-23 19:03:37 +01:00
"metadata": {},
2021-03-18 17:43:00 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
2021-03-25 15:20:06 +01:00
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
2021-03-18 17:43:00 +01:00
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
2021-03-23 19:03:37 +01:00
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
2021-03-25 15:20:06 +01:00
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-03-25 15:20:06 +01:00
" <th>0000-0001-6097-3953</th>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
2021-03-25 15:20:06 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
2021-03-25 15:20:06 +01:00
" <td>2018-03-02t09:29:16.528z</td>\n",
" <td>2018-03-02t09:43:07.551z</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-25 15:20:06 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-25 15:20:06 +01:00
" <th>0000-0001-6112-5550</th>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-25 15:20:06 +01:00
" <td>[v.i. yurtaev; v. yurtaev]</td>\n",
" <td>NaN</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-25 15:20:06 +01:00
" <td>NaN</td>\n",
" <td>[[professor, peoples friendship university of ...</td>\n",
2021-03-18 17:43:00 +01:00
" <td>0</td>\n",
" <td>NaN</td>\n",
2021-03-25 15:20:06 +01:00
" <td>2018-04-03t07:50:23.358z</td>\n",
" <td>2020-03-18t09:42:44.753z</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-25 15:20:06 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-25 15:20:06 +01:00
" <th>0000-0001-6152-2695</th>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-25 15:20:06 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
2021-03-25 15:20:06 +01:00
" <td>NaN</td>\n",
" <td>2019-12-11t15:31:56.388z</td>\n",
" <td>2020-01-28t15:34:17.309z</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
2021-03-25 15:20:06 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-25 15:20:06 +01:00
" <th>0000-0001-6220-5683</th>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-25 15:20:06 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[research scientist, new york university abu ...</td>\n",
2021-03-18 17:43:00 +01:00
" <td>0</td>\n",
" <td>NaN</td>\n",
2021-03-25 15:20:06 +01:00
" <td>2015-08-18t12:36:45.307z</td>\n",
" <td>2020-09-23t13:37:54.180z</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-25 15:20:06 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-25 15:20:06 +01:00
" <th>0000-0001-7071-8294</th>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-25 15:20:06 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[researcher (academic), universidad de zarago...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
2021-03-25 15:20:06 +01:00
" <td>NaN</td>\n",
" <td>2014-03-10t13:22:01.966z</td>\n",
" <td>2016-06-14t22:17:54.470z</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
2021-03-25 15:20:06 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2021-03-25 15:20:06 +01:00
" orcid verified_email verified_primary_email \\\n",
"0000-0001-6097-3953 1 0 0 \n",
"0000-0001-6112-5550 1 1 1 \n",
"0000-0001-6152-2695 1 1 1 \n",
"0000-0001-6220-5683 1 1 1 \n",
"0000-0001-7071-8294 1 1 1 \n",
"\n",
" given_names family_name biography \\\n",
"0000-0001-6097-3953 NaN NaN NaN \n",
"0000-0001-6112-5550 NaN NaN NaN \n",
"0000-0001-6152-2695 NaN NaN NaN \n",
"0000-0001-6220-5683 NaN NaN NaN \n",
"0000-0001-7071-8294 NaN NaN NaN \n",
"\n",
" other_names primary_email keywords \\\n",
"0000-0001-6097-3953 NaN NaN NaN \n",
"0000-0001-6112-5550 [v.i. yurtaev; v. yurtaev] NaN NaN \n",
"0000-0001-6152-2695 NaN NaN NaN \n",
"0000-0001-6220-5683 NaN NaN NaN \n",
"0000-0001-7071-8294 NaN NaN NaN \n",
"\n",
" external_ids education \\\n",
"0000-0001-6097-3953 NaN NaN \n",
"0000-0001-6112-5550 NaN NaN \n",
"0000-0001-6152-2695 NaN NaN \n",
"0000-0001-6220-5683 NaN NaN \n",
"0000-0001-7071-8294 NaN NaN \n",
"\n",
" employment \\\n",
"0000-0001-6097-3953 NaN \n",
"0000-0001-6112-5550 [[professor, peoples friendship university of ... \n",
"0000-0001-6152-2695 NaN \n",
"0000-0001-6220-5683 [[research scientist, new york university abu ... \n",
"0000-0001-7071-8294 [[researcher (academic), universidad de zarago... \n",
"\n",
" n_works works_source activation_date \\\n",
"0000-0001-6097-3953 0 NaN 2018-03-02t09:29:16.528z \n",
"0000-0001-6112-5550 0 NaN 2018-04-03t07:50:23.358z \n",
"0000-0001-6152-2695 0 NaN 2019-12-11t15:31:56.388z \n",
"0000-0001-6220-5683 0 NaN 2015-08-18t12:36:45.307z \n",
"0000-0001-7071-8294 0 NaN 2014-03-10t13:22:01.966z \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc \\\n",
"0000-0001-6097-3953 2018-03-02t09:43:07.551z 0 0 0 \n",
"0000-0001-6112-5550 2020-03-18t09:42:44.753z 0 0 0 \n",
"0000-0001-6152-2695 2020-01-28t15:34:17.309z 0 0 0 \n",
"0000-0001-6220-5683 2020-09-23t13:37:54.180z 0 0 0 \n",
"0000-0001-7071-8294 2016-06-14t22:17:54.470z 0 0 0 \n",
"\n",
" n_other_pids label primary_email_domain \\\n",
"0000-0001-6097-3953 0 0 NaN \n",
"0000-0001-6112-5550 0 0 NaN \n",
"0000-0001-6152-2695 0 0 NaN \n",
"0000-0001-6220-5683 0 0 NaN \n",
"0000-0001-7071-8294 0 0 NaN \n",
"\n",
" other_email_domains url_domains n_emails n_urls n_ids \\\n",
"0000-0001-6097-3953 NaN NaN NaN NaN NaN \n",
"0000-0001-6112-5550 NaN NaN NaN NaN NaN \n",
"0000-0001-6152-2695 NaN NaN NaN NaN NaN \n",
"0000-0001-6220-5683 NaN NaN NaN NaN NaN \n",
"0000-0001-7071-8294 NaN NaN NaN NaN NaN \n",
"\n",
" n_keywords n_education n_employment \n",
"0000-0001-6097-3953 NaN NaN NaN \n",
"0000-0001-6112-5550 NaN NaN 1.0 \n",
"0000-0001-6152-2695 NaN NaN NaN \n",
"0000-0001-6220-5683 NaN NaN 1.0 \n",
"0000-0001-7071-8294 NaN NaN 2.0 "
2021-03-18 17:43:00 +01:00
]
},
"execution_count": 5,
2021-03-18 17:43:00 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-24 13:33:01 +01:00
"parts = glob.glob('../data/processed/dataset.pkl.*')\n",
2021-03-25 15:20:06 +01:00
"\n",
"df = pd.concat((pd.read_pickle(part) for part in sorted(parts)))\n",
2021-03-23 19:03:37 +01:00
"df.head(5)"
2021-03-18 17:43:00 +01:00
]
},
2021-03-23 12:13:04 +01:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-23 19:03:37 +01:00
"Notable profiles inspection"
2021-03-23 12:13:04 +01:00
]
},
2021-03-18 17:43:00 +01:00
{
"cell_type": "code",
"execution_count": 6,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
2021-03-25 15:20:06 +01:00
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
2021-03-18 17:43:00 +01:00
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
2021-03-23 19:03:37 +01:00
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
2021-03-25 15:20:06 +01:00
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2021-03-25 15:20:06 +01:00
"Empty DataFrame\n",
"Columns: [orcid, verified_email, verified_primary_email, given_names, family_name, biography, other_names, primary_email, keywords, external_ids, education, employment, n_works, works_source, activation_date, last_update_date, n_doi, n_arxiv, n_pmc, n_other_pids, label, primary_email_domain, other_email_domains, url_domains, n_emails, n_urls, n_ids, n_keywords, n_education, n_employment]\n",
"Index: []"
2021-03-18 17:43:00 +01:00
]
},
"execution_count": 6,
2021-03-18 17:43:00 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-23 19:03:37 +01:00
"df[df['orcid'] == AM]"
2021-03-23 12:13:04 +01:00
]
},
2021-03-18 17:43:00 +01:00
{
"cell_type": "code",
"execution_count": 7,
2021-03-23 12:13:04 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
2021-03-23 19:03:37 +01:00
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
2021-03-25 15:20:06 +01:00
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
2021-03-23 19:03:37 +01:00
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
2021-03-25 15:20:06 +01:00
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
2021-03-23 12:13:04 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2021-03-25 15:20:06 +01:00
"Empty DataFrame\n",
"Columns: [orcid, verified_email, verified_primary_email, given_names, family_name, biography, other_names, primary_email, keywords, external_ids, education, employment, n_works, works_source, activation_date, last_update_date, n_doi, n_arxiv, n_pmc, n_other_pids, label, primary_email_domain, other_email_domains, url_domains, n_emails, n_urls, n_ids, n_keywords, n_education, n_employment]\n",
"Index: []"
2021-03-23 19:03:37 +01:00
]
},
"execution_count": 7,
2021-03-23 19:03:37 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['orcid'] == WHATSAPP]"
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 8,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"orcid 10916574\n",
"verified_email 10916574\n",
"verified_primary_email 10916574\n",
"given_names 10886150\n",
"family_name 10601571\n",
"biography 348649\n",
"other_names 551482\n",
"primary_email 123851\n",
"keywords 646400\n",
"external_ids 1301959\n",
"education 2430233\n",
"employment 2665092\n",
"n_works 10916574\n",
"works_source 2721431\n",
"activation_date 10916574\n",
"last_update_date 10916574\n",
"n_doi 10916574\n",
"n_arxiv 10916574\n",
"n_pmc 10916574\n",
"n_other_pids 10916574\n",
"label 10916574\n",
"primary_email_domain 123851\n",
"other_email_domains 48306\n",
"url_domains 707687\n",
"n_emails 48306\n",
"n_urls 707687\n",
"n_ids 1301959\n",
"n_keywords 646400\n",
"n_education 2430233\n",
"n_employment 2665092\n",
"dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-23 12:13:04 +01:00
"source": [
2021-03-24 13:33:01 +01:00
"df.count() #10916574"
2021-03-23 12:13:04 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 9,
2021-03-23 12:13:04 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/plain": [
"count 10916574.0\n",
"mean 1.0\n",
"std 0.0\n",
"min 1.0\n",
"25% 1.0\n",
"50% 1.0\n",
"75% 1.0\n",
"max 1.0\n",
"Name: orcid, dtype: float64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-23 12:13:04 +01:00
"source": [
2021-03-23 19:03:37 +01:00
"df['orcid'].describe()"
2021-03-23 12:13:04 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-23 19:03:37 +01:00
"## Primary email"
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 10,
2021-03-23 19:03:37 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/plain": [
"count 123851\n",
"unique 123848\n",
"top maykin@owasp.org\n",
"freq 2\n",
"Name: primary_email, dtype: object"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-23 19:03:37 +01:00
"source": [
"df['primary_email'].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Dupe emails"
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 11,
2021-03-23 19:03:37 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/plain": [
"0000-0003-0033-0918 opercin@erbakan.edu.tr\n",
"0000-0002-8774-0030 patrick.davey@monash.edu\n",
"0000-0001-9855-1676 maykin@owasp.org\n",
"Name: primary_email, dtype: object"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-23 19:03:37 +01:00
"source": [
"df['primary_email'].dropna().loc[df['primary_email'].duplicated()]"
2021-03-23 12:13:04 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 12,
2021-03-18 17:43:00 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0000-0002-0836-2271</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>maykin</td>\n",
" <td>warasart</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>maykin@owasp.org</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-09-15t04:43:55.709z</td>\n",
" <td>2020-09-15t05:17:28.509z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>owasp.org</td>\n",
" <td>[dga.or.th]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0001-9855-1676</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>maykin</td>\n",
" <td>warasart</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>maykin@owasp.org</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-10-23t17:51:51.925z</td>\n",
" <td>2021-01-01t15:00:52.053z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>owasp.org</td>\n",
" <td>[dga.or.th, ieee.org]</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"0000-0002-0836-2271 1 1 1 \n",
"0000-0001-9855-1676 1 1 1 \n",
"\n",
" given_names family_name biography other_names \\\n",
"0000-0002-0836-2271 maykin warasart NaN NaN \n",
"0000-0001-9855-1676 maykin warasart NaN NaN \n",
"\n",
" primary_email keywords external_ids education \\\n",
"0000-0002-0836-2271 maykin@owasp.org NaN NaN NaN \n",
"0000-0001-9855-1676 maykin@owasp.org NaN NaN NaN \n",
"\n",
" employment n_works works_source \\\n",
"0000-0002-0836-2271 NaN 0 NaN \n",
"0000-0001-9855-1676 NaN 0 NaN \n",
"\n",
" activation_date last_update_date \\\n",
"0000-0002-0836-2271 2020-09-15t04:43:55.709z 2020-09-15t05:17:28.509z \n",
"0000-0001-9855-1676 2020-10-23t17:51:51.925z 2021-01-01t15:00:52.053z \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label \\\n",
"0000-0002-0836-2271 0 0 0 0 0 \n",
"0000-0001-9855-1676 0 0 0 0 0 \n",
"\n",
" primary_email_domain other_email_domains url_domains \\\n",
"0000-0002-0836-2271 owasp.org [dga.or.th] NaN \n",
"0000-0001-9855-1676 owasp.org [dga.or.th, ieee.org] NaN \n",
"\n",
" n_emails n_urls n_ids n_keywords n_education \\\n",
"0000-0002-0836-2271 1.0 NaN NaN NaN NaN \n",
"0000-0001-9855-1676 2.0 NaN NaN NaN NaN \n",
"\n",
" n_employment \n",
"0000-0002-0836-2271 NaN \n",
"0000-0001-9855-1676 NaN "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['primary_email'] == 'maykin@owasp.org']"
]
},
{
"cell_type": "code",
"execution_count": 13,
2021-03-18 17:43:00 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0000-0002-2232-9638</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>osman</td>\n",
" <td>perçin</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>opercin@erbakan.edu.tr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-01-12t13:47:55.549z</td>\n",
" <td>2020-01-27t07:38:24.269z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>erbakan.edu.tr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-0033-0918</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>osman</td>\n",
" <td>perçin</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>opercin@erbakan.edu.tr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, necmettin erbakan university, konya, , tr,...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-10-13t05:47:12.014z</td>\n",
" <td>2020-12-25t13:52:03.976z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>erbakan.edu.tr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"0000-0002-2232-9638 1 1 1 \n",
"0000-0003-0033-0918 1 1 1 \n",
"\n",
" given_names family_name biography other_names \\\n",
"0000-0002-2232-9638 osman perçin NaN NaN \n",
"0000-0003-0033-0918 osman perçin NaN NaN \n",
"\n",
" primary_email keywords external_ids education \\\n",
"0000-0002-2232-9638 opercin@erbakan.edu.tr NaN NaN NaN \n",
"0000-0003-0033-0918 opercin@erbakan.edu.tr NaN NaN NaN \n",
"\n",
" employment \\\n",
"0000-0002-2232-9638 NaN \n",
"0000-0003-0033-0918 [[, necmettin erbakan university, konya, , tr,... \n",
"\n",
" n_works works_source activation_date \\\n",
"0000-0002-2232-9638 0 NaN 2015-01-12t13:47:55.549z \n",
"0000-0003-0033-0918 0 NaN 2015-10-13t05:47:12.014z \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc \\\n",
"0000-0002-2232-9638 2020-01-27t07:38:24.269z 0 0 0 \n",
"0000-0003-0033-0918 2020-12-25t13:52:03.976z 0 0 0 \n",
"\n",
" n_other_pids label primary_email_domain \\\n",
"0000-0002-2232-9638 0 0 erbakan.edu.tr \n",
"0000-0003-0033-0918 0 0 erbakan.edu.tr \n",
"\n",
" other_email_domains url_domains n_emails n_urls n_ids \\\n",
"0000-0002-2232-9638 NaN NaN NaN NaN NaN \n",
"0000-0003-0033-0918 NaN NaN NaN NaN NaN \n",
"\n",
" n_keywords n_education n_employment \n",
"0000-0002-2232-9638 NaN NaN NaN \n",
"0000-0003-0033-0918 NaN NaN 1.0 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['primary_email'] == 'opercin@erbakan.edu.tr']"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0000-0002-9158-1757</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>patrick</td>\n",
" <td>davey</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>patrick.davey@monash.edu</td>\n",
" <td>[radiochemistry, bioinorganic chemistry, inorg...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[phd student, monash university, melbourne, ,...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2019-05-09t23:01:02.170z</td>\n",
" <td>2019-08-20t03:00:17.844z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>monash.edu</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-8774-0030</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>patrick</td>\n",
" <td>davey</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>patrick.davey@monash.edu</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[phd student, monash university, melbourne, v...</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>2018-09-11t10:47:10.997z</td>\n",
" <td>2021-02-09t06:21:44.138z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>monash.edu</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"0000-0002-9158-1757 1 1 1 \n",
"0000-0002-8774-0030 1 1 1 \n",
"\n",
" given_names family_name biography other_names \\\n",
"0000-0002-9158-1757 patrick davey NaN NaN \n",
"0000-0002-8774-0030 patrick davey NaN NaN \n",
"\n",
" primary_email \\\n",
"0000-0002-9158-1757 patrick.davey@monash.edu \n",
"0000-0002-8774-0030 patrick.davey@monash.edu \n",
"\n",
" keywords \\\n",
"0000-0002-9158-1757 [radiochemistry, bioinorganic chemistry, inorg... \n",
"0000-0002-8774-0030 NaN \n",
"\n",
" external_ids education \\\n",
"0000-0002-9158-1757 NaN NaN \n",
"0000-0002-8774-0030 NaN NaN \n",
"\n",
" employment \\\n",
"0000-0002-9158-1757 [[phd student, monash university, melbourne, ,... \n",
"0000-0002-8774-0030 [[phd student, monash university, melbourne, v... \n",
"\n",
" n_works works_source activation_date \\\n",
"0000-0002-9158-1757 0 NaN 2019-05-09t23:01:02.170z \n",
"0000-0002-8774-0030 1 [crossref] 2018-09-11t10:47:10.997z \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc \\\n",
"0000-0002-9158-1757 2019-08-20t03:00:17.844z 0 0 0 \n",
"0000-0002-8774-0030 2021-02-09t06:21:44.138z 1 0 0 \n",
"\n",
" n_other_pids label primary_email_domain \\\n",
"0000-0002-9158-1757 0 0 monash.edu \n",
"0000-0002-8774-0030 0 0 monash.edu \n",
"\n",
" other_email_domains url_domains n_emails n_urls n_ids \\\n",
"0000-0002-9158-1757 NaN NaN NaN NaN NaN \n",
"0000-0002-8774-0030 NaN NaN NaN NaN NaN \n",
"\n",
" n_keywords n_education n_employment \n",
"0000-0002-9158-1757 4.0 NaN 1.0 \n",
"0000-0002-8774-0030 NaN NaN 1.0 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['primary_email'] == 'patrick.davey@monash.edu']"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 123851\n",
"unique 17089\n",
"top gmail.com\n",
"freq 26540\n",
"Name: primary_email_domain, dtype: object"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['primary_email_domain'].describe()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" </tr>\n",
" <tr>\n",
" <th>primary_email_domain</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>gmail.com</th>\n",
" <td>26540</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hotmail.com</th>\n",
" <td>3769</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yahoo.com</th>\n",
" <td>2614</td>\n",
" </tr>\n",
" <tr>\n",
" <th>163.com</th>\n",
" <td>2109</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yuhs.ac</th>\n",
" <td>1132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imean-biotech.com</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imec.msu.ru</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imedea.uib-csic.es</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imes.uni-hannover.de</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>zzuli.edu.cn</th>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>17089 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid\n",
"primary_email_domain \n",
"gmail.com 26540\n",
"hotmail.com 3769\n",
"yahoo.com 2614\n",
"163.com 2109\n",
"yuhs.ac 1132\n",
"... ...\n",
"imean-biotech.com 1\n",
"imec.msu.ru 1\n",
"imedea.uib-csic.es 1\n",
"imes.uni-hannover.de 1\n",
"zzuli.edu.cn 1\n",
"\n",
"[17089 rows x 1 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"top_primary_emails = df[['primary_email_domain', 'orcid']]\\\n",
" .groupby('primary_email_domain')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)\n",
"top_primary_emails"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"gmail.com",
"hotmail.com",
"yahoo.com",
"163.com",
"yuhs.ac",
"qq.com",
"outlook.com",
"126.com",
"bu.edu",
"usgs.gov",
"mail.ru",
"yahoo.com.br",
"usp.br",
"ua.pt",
"umich.edu",
"ust.hk",
"foxmail.com",
"uomustansiriyah.edu.iq",
"yandex.ru",
"uq.edu.au",
"ukr.net",
"unesp.br",
"ucl.ac.uk",
"ieee.org",
"naver.com",
"st-annes.ox.ac.uk",
"stcatz.ox.ac.uk",
"yahoo.fr",
"ucm.es",
"live.com"
],
"y": [
26540,
3769,
2614,
2109,
1132,
1056,
940,
762,
630,
584,
575,
458,
457,
300,
290,
277,
258,
247,
242,
235,
225,
218,
207,
204,
187,
184,
184,
172,
171,
163
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top-30 email domains"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"cf28fbb7-7903-496c-adf0-fc846ff1c434\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"cf28fbb7-7903-496c-adf0-fc846ff1c434\")) { Plotly.newPlot( \"cf28fbb7-7903-496c-adf0-fc846ff1c434\", [{\"type\": \"bar\", \"x\": [\"gmail.com\", \"hotmail.com\", \"yahoo.com\", \"163.com\", \"yuhs.ac\", \"qq.com\", \"outlook.com\", \"126.com\", \"bu.edu\", \"usgs.gov\", \"mail.ru\", \"yahoo.com.br\", \"usp.br\", \"ua.pt\", \"umich.edu\", \"ust.hk\", \"foxmail.com\", \"uomustansiriyah.edu.iq\", \"yandex.ru\", \"uq.edu.au\", \"ukr.net\", \"unesp.br\", \"ucl.ac.uk\", \"ieee.org\", \"naver.com\", \"st-annes.ox.ac.uk\", \"stcatz.ox.ac.uk\", \"yahoo.fr\", \"ucm.es\", \"live.com\"], \"y\": [26540, 3769, 2614, 2109, 1132, 1056, 940, 762, 630, 584, 575, 458, 457, 300, 290, 277, 258, 247, 242, 235, 225, 218, 207, 204, 187, 184, 184, 172, 171, 163]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46
" \n",
"var gd = document.getElementById('cf28fbb7-7903-496c-adf0-fc846ff1c434');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(30)\n",
"data = [\n",
" go.Bar(\n",
" x=top_primary_emails[:TOP_N].index,\n",
" y=top_primary_emails[:TOP_N]['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top-%s email domains' % TOP_N,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Other emails"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0000-0002-5916-446X</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>antonio gilvan</td>\n",
" <td>teixeira júnior</td>\n",
" <td>NaN</td>\n",
" <td>[teixeira, antônio gilvan, júnior, antonio gil...</td>\n",
" <td>gilvan.junior@aluno.ufca.edu.br</td>\n",
" <td>[ethicis; medicine; infectology; neurology; ne...</td>\n",
" <td>[[scopus author id, 56647743200], [scopus auth...</td>\n",
" <td>[[faculty of health and life sciences, , unive...</td>\n",
" <td>NaN</td>\n",
" <td>14</td>\n",
" <td>[antonio gilvan teixeira júnior, scopus - else...</td>\n",
" <td>2016-05-18t11:26:36.642z</td>\n",
" <td>2016-09-20t18:25:05.728z</td>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" <td>aluno.ufca.edu.br</td>\n",
" <td>[liverpool.ac.uk]</td>\n",
" <td>[researchgate.net, academia.edu, cnpq.br]</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-8742-947X</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>aaron</td>\n",
" <td>tan shing loong</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>aaron.tanshingloong@wadh.ox.ac.uk</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[ruskin school of art; wadham college, , univ...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-10-05t23:10:08.771z</td>\n",
" <td>2016-06-14t19:55:50.313z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>wadh.ox.ac.uk</td>\n",
" <td>[rsa.ox.ac.uk]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0001-9097-2281</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>abhishek</td>\n",
" <td>solanki</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[senior engineer, robert bosch (india), benga...</td>\n",
" <td>1</td>\n",
" <td>[abhishek solanki]</td>\n",
" <td>2019-04-22t04:43:06.232z</td>\n",
" <td>2020-07-02t14:18:28.305z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[in.bosch.com]</td>\n",
" <td>[github.com, linkedin.com]</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-8614-3007</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>adam</td>\n",
" <td>arra</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2017-11-15t06:33:45.625z</td>\n",
" <td>2017-11-15t06:44:02.998z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[hct.ac.ae]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0001-9884-5498</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>alberto</td>\n",
" <td>ronzani</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>alberto@aronza.com</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[research scientist, vtt technical research c...</td>\n",
" <td>19</td>\n",
" <td>[crossref metadata search, alberto ronzani, cr...</td>\n",
" <td>2014-04-16t13:21:54.287z</td>\n",
" <td>2020-09-28t15:10:37.439z</td>\n",
" <td>18</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>aronza.com</td>\n",
" <td>[vtt.fi]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"0000-0002-5916-446X 1 1 1 \n",
"0000-0002-8742-947X 1 1 1 \n",
"0000-0001-9097-2281 1 1 1 \n",
"0000-0002-8614-3007 1 1 1 \n",
"0000-0001-9884-5498 1 1 1 \n",
"\n",
" given_names family_name biography \\\n",
"0000-0002-5916-446X antonio gilvan teixeira júnior NaN \n",
"0000-0002-8742-947X aaron tan shing loong NaN \n",
"0000-0001-9097-2281 abhishek solanki NaN \n",
"0000-0002-8614-3007 adam arra NaN \n",
"0000-0001-9884-5498 alberto ronzani NaN \n",
"\n",
" other_names \\\n",
"0000-0002-5916-446X [teixeira, antônio gilvan, júnior, antonio gil... \n",
"0000-0002-8742-947X NaN \n",
"0000-0001-9097-2281 NaN \n",
"0000-0002-8614-3007 NaN \n",
"0000-0001-9884-5498 NaN \n",
"\n",
" primary_email \\\n",
"0000-0002-5916-446X gilvan.junior@aluno.ufca.edu.br \n",
"0000-0002-8742-947X aaron.tanshingloong@wadh.ox.ac.uk \n",
"0000-0001-9097-2281 NaN \n",
"0000-0002-8614-3007 NaN \n",
"0000-0001-9884-5498 alberto@aronza.com \n",
"\n",
" keywords \\\n",
"0000-0002-5916-446X [ethicis; medicine; infectology; neurology; ne... \n",
"0000-0002-8742-947X NaN \n",
"0000-0001-9097-2281 NaN \n",
"0000-0002-8614-3007 NaN \n",
"0000-0001-9884-5498 NaN \n",
"\n",
" external_ids \\\n",
"0000-0002-5916-446X [[scopus author id, 56647743200], [scopus auth... \n",
"0000-0002-8742-947X NaN \n",
"0000-0001-9097-2281 NaN \n",
"0000-0002-8614-3007 NaN \n",
"0000-0001-9884-5498 NaN \n",
"\n",
" education \\\n",
"0000-0002-5916-446X [[faculty of health and life sciences, , unive... \n",
"0000-0002-8742-947X [[ruskin school of art; wadham college, , univ... \n",
"0000-0001-9097-2281 NaN \n",
"0000-0002-8614-3007 NaN \n",
"0000-0001-9884-5498 NaN \n",
"\n",
" employment \\\n",
"0000-0002-5916-446X NaN \n",
"0000-0002-8742-947X NaN \n",
"0000-0001-9097-2281 [[senior engineer, robert bosch (india), benga... \n",
"0000-0002-8614-3007 NaN \n",
"0000-0001-9884-5498 [[research scientist, vtt technical research c... \n",
"\n",
" n_works \\\n",
"0000-0002-5916-446X 14 \n",
"0000-0002-8742-947X 0 \n",
"0000-0001-9097-2281 1 \n",
"0000-0002-8614-3007 0 \n",
"0000-0001-9884-5498 19 \n",
"\n",
" works_source \\\n",
"0000-0002-5916-446X [antonio gilvan teixeira júnior, scopus - else... \n",
"0000-0002-8742-947X NaN \n",
"0000-0001-9097-2281 [abhishek solanki] \n",
"0000-0002-8614-3007 NaN \n",
"0000-0001-9884-5498 [crossref metadata search, alberto ronzani, cr... \n",
"\n",
" activation_date last_update_date \\\n",
"0000-0002-5916-446X 2016-05-18t11:26:36.642z 2016-09-20t18:25:05.728z \n",
"0000-0002-8742-947X 2015-10-05t23:10:08.771z 2016-06-14t19:55:50.313z \n",
"0000-0001-9097-2281 2019-04-22t04:43:06.232z 2020-07-02t14:18:28.305z \n",
"0000-0002-8614-3007 2017-11-15t06:33:45.625z 2017-11-15t06:44:02.998z \n",
"0000-0001-9884-5498 2014-04-16t13:21:54.287z 2020-09-28t15:10:37.439z \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label \\\n",
"0000-0002-5916-446X 13 0 0 8 0 \n",
"0000-0002-8742-947X 0 0 0 0 0 \n",
"0000-0001-9097-2281 0 0 0 0 0 \n",
"0000-0002-8614-3007 0 0 0 0 0 \n",
"0000-0001-9884-5498 18 0 0 3 0 \n",
"\n",
" primary_email_domain other_email_domains \\\n",
"0000-0002-5916-446X aluno.ufca.edu.br [liverpool.ac.uk] \n",
"0000-0002-8742-947X wadh.ox.ac.uk [rsa.ox.ac.uk] \n",
"0000-0001-9097-2281 NaN [in.bosch.com] \n",
"0000-0002-8614-3007 NaN [hct.ac.ae] \n",
"0000-0001-9884-5498 aronza.com [vtt.fi] \n",
"\n",
" url_domains n_emails \\\n",
"0000-0002-5916-446X [researchgate.net, academia.edu, cnpq.br] 1.0 \n",
"0000-0002-8742-947X NaN 1.0 \n",
"0000-0001-9097-2281 [github.com, linkedin.com] 1.0 \n",
"0000-0002-8614-3007 NaN 1.0 \n",
"0000-0001-9884-5498 NaN 1.0 \n",
"\n",
" n_urls n_ids n_keywords n_education n_employment \n",
"0000-0002-5916-446X 3.0 4.0 1.0 1.0 NaN \n",
"0000-0002-8742-947X NaN NaN NaN 1.0 NaN \n",
"0000-0001-9097-2281 2.0 NaN NaN NaN 2.0 \n",
"0000-0002-8614-3007 NaN NaN NaN NaN NaN \n",
"0000-0001-9884-5498 NaN NaN NaN NaN 1.0 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.other_email_domains.notna()].head()"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"emails_by_orcid = df.sort_values('n_emails', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29
],
"y": [
12,
9,
7,
7,
6,
6,
6,
6,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
4,
4
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 30 ORCID iDs by email"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"79c196c1-30b5-4f62-9271-4d67295246b8\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"79c196c1-30b5-4f62-9271-4d67295246b8\")) { Plotly.newPlot( \"79c196c1-30b5-4f62-9271-4d67295246b8\", [{\"type\": \"bar\", \"x\": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29], \"y\": [12.0, 9.0, 7.0, 7.0, 6.0, 6.0, 6.0, 6.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.0, 4.0]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2dcontour\"}], \"mesh3d\": [{\"colorbar\": {\"outlin
" \n",
"var gd = document.getElementById('79c196c1-30b5-4f62-9271-4d67295246b8');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(30)\n",
"data = [\n",
" go.Bar(\n",
" x=emails_by_orcid[:TOP_N].index,\n",
" y=emails_by_orcid[:TOP_N]['n_emails']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top %s ORCID iDs by email' % TOP_N, \n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"top_other_emails = df[['orcid', 'other_email_domains']]\\\n",
" .explode('other_email_domains')\\\n",
" .reset_index(drop=True)\\\n",
" .groupby('other_email_domains')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"gmail.com",
"hotmail.com",
"yahoo.com",
"qq.com",
"163.com",
"outlook.com",
"126.com",
"usp.br",
"ieee.org",
"mail.ru",
"yahoo.com.br",
"unesp.br",
"sbs.ox.ac.uk",
"yuhs.ac",
"naver.com",
"icloud.com",
"foxmail.com",
"uq.edu.au",
"ua.pt",
"cam.ac.uk",
"imperial.ac.uk",
"ukr.net",
"law.ox.ac.uk",
"mit.edu",
"stanford.edu",
"monash.edu",
"ucl.ac.uk",
"education.ox.ac.uk",
"ucm.es",
"conted.ox.ac.uk"
],
"y": [
11116,
1541,
1295,
779,
774,
425,
260,
236,
224,
149,
147,
141,
136,
133,
130,
118,
96,
94,
89,
84,
77,
76,
75,
74,
71,
70,
68,
67,
66,
64
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 30 other email domains"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"283b86e8-f845-41e4-a929-d9a849ca2e5f\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"283b86e8-f845-41e4-a929-d9a849ca2e5f\")) { Plotly.newPlot( \"283b86e8-f845-41e4-a929-d9a849ca2e5f\", [{\"type\": \"bar\", \"x\": [\"gmail.com\", \"hotmail.com\", \"yahoo.com\", \"qq.com\", \"163.com\", \"outlook.com\", \"126.com\", \"usp.br\", \"ieee.org\", \"mail.ru\", \"yahoo.com.br\", \"unesp.br\", \"sbs.ox.ac.uk\", \"yuhs.ac\", \"naver.com\", \"icloud.com\", \"foxmail.com\", \"uq.edu.au\", \"ua.pt\", \"cam.ac.uk\", \"imperial.ac.uk\", \"ukr.net\", \"law.ox.ac.uk\", \"mit.edu\", \"stanford.edu\", \"monash.edu\", \"ucl.ac.uk\", \"education.ox.ac.uk\", \"ucm.es\", \"conted.ox.ac.uk\"], \"y\": [11116, 1541, 1295, 779, 774, 425, 260, 236, 224, 149, 147, 141, 136, 133, 130, 118, 96, 94, 89, 84, 77, 76, 75, 74, 71, 70, 68, 67, 66, 64]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"
" \n",
"var gd = document.getElementById('283b86e8-f845-41e4-a929-d9a849ca2e5f');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(30)\n",
"data = [\n",
" go.Bar(\n",
" x=top_other_emails[:TOP_N].index,\n",
" y=top_other_emails[:TOP_N]['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top %s other email domains' % TOP_N, \n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This somehow makes sense, legitimate users could put the gmail account as primary for login purposes and have institutional addresses as other email addresses. It makes also the life easier upon relocation."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Email speculation"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0000-0001-9097-2281</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>abhishek</td>\n",
" <td>solanki</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[senior engineer, robert bosch (india), benga...</td>\n",
" <td>1</td>\n",
" <td>[abhishek solanki]</td>\n",
" <td>2019-04-22t04:43:06.232z</td>\n",
" <td>2020-07-02t14:18:28.305z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[in.bosch.com]</td>\n",
" <td>[github.com, linkedin.com]</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-8614-3007</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>adam</td>\n",
" <td>arra</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2017-11-15t06:33:45.625z</td>\n",
" <td>2017-11-15t06:44:02.998z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[hct.ac.ae]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-3728-6439</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>alejandra</td>\n",
" <td>echeverry velásquez</td>\n",
" <td>alejandra echeverry is an industrial electrici...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[energy., renewable, science, control, innovat...</td>\n",
" <td>NaN</td>\n",
" <td>[[, electrical engineer, institución universit...</td>\n",
" <td>[[professor, institución universitaria pascual...</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>2019-03-31t00:00:42.929z</td>\n",
" <td>2020-09-06t02:18:54.290z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[pascualbravo.edu.co]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0001-8330-7443</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>andrea</td>\n",
" <td>tesoniero</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[researcherid, d-9056-2015]]</td>\n",
" <td>[[department of geophysics, master of science ...</td>\n",
" <td>[[postdoctoral associate, yale university, new...</td>\n",
" <td>4</td>\n",
" <td>[andrea tesoniero]</td>\n",
" <td>2015-03-09t11:59:06.093z</td>\n",
" <td>2020-08-20t15:03:23.447z</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[yale.edu]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0001-9670-515X</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>esma esin</td>\n",
" <td>yildirim</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[chemical engineering, pharmacognosy, natural ...</td>\n",
" <td>NaN</td>\n",
" <td>[[business management, master of science, ista...</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-07-26t10:38:03.721z</td>\n",
" <td>2020-07-26t10:52:26.539z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[gmail.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>3.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-1204-6009</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nathan</td>\n",
" <td>walk</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[department of physics, doctor of philosophy,...</td>\n",
" <td>[[, university of oxford, oxford, oxfordshire,...</td>\n",
" <td>10</td>\n",
" <td>[crossref metadata search]</td>\n",
" <td>2016-07-28t14:24:16.844z</td>\n",
" <td>2020-10-13t11:47:50.621z</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[cs.ox.ac.uk]</td>\n",
" <td>[fu-berlin.de]</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-3472-7668</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>raf</td>\n",
" <td>vandevelde</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[chemical engineering technology, master, kat...</td>\n",
" <td>[[phd researcher, katholieke universiteit leuv...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-10-14t13:56:44.779z</td>\n",
" <td>2020-10-16t14:21:40.673z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[kuleuven.be]</td>\n",
" <td>[linkedin.com]</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-9602-0529</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>carlos augusto</td>\n",
" <td>finelli</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>2013-09-16t16:52:06.120z</td>\n",
" <td>2020-12-01t22:47:08.074z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[cecot.com.br]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-4402-5982</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>filipe</td>\n",
" <td>de almeida araújo</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[materials science, msc. materials science, m...</td>\n",
" <td>[[co-owner, aeft acessory, manaus, amazonas, b...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-03-02t20:11:01.699z</td>\n",
" <td>2020-12-04t13:53:39.404z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[ime.eb.br]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-1734-7241</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>manareldeen</td>\n",
" <td>ahmed</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[atomistic simulation, ai chips, thin films, d...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[post-doctor, zhejiang university, hangzhou, ...</td>\n",
" <td>6</td>\n",
" <td>[manareldeen ahmed]</td>\n",
" <td>2017-02-17t13:18:36.540z</td>\n",
" <td>2020-12-04t02:04:36.668z</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[hotmail.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>5.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>19692 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"0000-0001-9097-2281 1 1 1 \n",
"0000-0002-8614-3007 1 1 1 \n",
"0000-0003-3728-6439 1 1 1 \n",
"0000-0001-8330-7443 1 1 1 \n",
"0000-0001-9670-515X 1 1 1 \n",
"... ... ... ... \n",
"0000-0003-1204-6009 1 1 1 \n",
"0000-0002-3472-7668 1 1 1 \n",
"0000-0002-9602-0529 1 1 1 \n",
"0000-0003-4402-5982 1 1 1 \n",
"0000-0002-1734-7241 1 1 1 \n",
"\n",
" given_names family_name \\\n",
"0000-0001-9097-2281 abhishek solanki \n",
"0000-0002-8614-3007 adam arra \n",
"0000-0003-3728-6439 alejandra echeverry velásquez \n",
"0000-0001-8330-7443 andrea tesoniero \n",
"0000-0001-9670-515X esma esin yildirim \n",
"... ... ... \n",
"0000-0003-1204-6009 nathan walk \n",
"0000-0002-3472-7668 raf vandevelde \n",
"0000-0002-9602-0529 carlos augusto finelli \n",
"0000-0003-4402-5982 filipe de almeida araújo \n",
"0000-0002-1734-7241 manareldeen ahmed \n",
"\n",
" biography \\\n",
"0000-0001-9097-2281 NaN \n",
"0000-0002-8614-3007 NaN \n",
"0000-0003-3728-6439 alejandra echeverry is an industrial electrici... \n",
"0000-0001-8330-7443 NaN \n",
"0000-0001-9670-515X NaN \n",
"... ... \n",
"0000-0003-1204-6009 NaN \n",
"0000-0002-3472-7668 NaN \n",
"0000-0002-9602-0529 NaN \n",
"0000-0003-4402-5982 NaN \n",
"0000-0002-1734-7241 NaN \n",
"\n",
" other_names primary_email \\\n",
"0000-0001-9097-2281 NaN NaN \n",
"0000-0002-8614-3007 NaN NaN \n",
"0000-0003-3728-6439 NaN NaN \n",
"0000-0001-8330-7443 NaN NaN \n",
"0000-0001-9670-515X NaN NaN \n",
"... ... ... \n",
"0000-0003-1204-6009 NaN NaN \n",
"0000-0002-3472-7668 NaN NaN \n",
"0000-0002-9602-0529 NaN NaN \n",
"0000-0003-4402-5982 NaN NaN \n",
"0000-0002-1734-7241 NaN NaN \n",
"\n",
" keywords \\\n",
"0000-0001-9097-2281 NaN \n",
"0000-0002-8614-3007 NaN \n",
"0000-0003-3728-6439 [energy., renewable, science, control, innovat... \n",
"0000-0001-8330-7443 NaN \n",
"0000-0001-9670-515X [chemical engineering, pharmacognosy, natural ... \n",
"... ... \n",
"0000-0003-1204-6009 NaN \n",
"0000-0002-3472-7668 NaN \n",
"0000-0002-9602-0529 NaN \n",
"0000-0003-4402-5982 NaN \n",
"0000-0002-1734-7241 [atomistic simulation, ai chips, thin films, d... \n",
"\n",
" external_ids \\\n",
"0000-0001-9097-2281 NaN \n",
"0000-0002-8614-3007 NaN \n",
"0000-0003-3728-6439 NaN \n",
"0000-0001-8330-7443 [[researcherid, d-9056-2015]] \n",
"0000-0001-9670-515X NaN \n",
"... ... \n",
"0000-0003-1204-6009 NaN \n",
"0000-0002-3472-7668 NaN \n",
"0000-0002-9602-0529 NaN \n",
"0000-0003-4402-5982 NaN \n",
"0000-0002-1734-7241 NaN \n",
"\n",
" education \\\n",
"0000-0001-9097-2281 NaN \n",
"0000-0002-8614-3007 NaN \n",
"0000-0003-3728-6439 [[, electrical engineer, institución universit... \n",
"0000-0001-8330-7443 [[department of geophysics, master of science ... \n",
"0000-0001-9670-515X [[business management, master of science, ista... \n",
"... ... \n",
"0000-0003-1204-6009 [[department of physics, doctor of philosophy,... \n",
"0000-0002-3472-7668 [[chemical engineering technology, master, kat... \n",
"0000-0002-9602-0529 NaN \n",
"0000-0003-4402-5982 [[materials science, msc. materials science, m... \n",
"0000-0002-1734-7241 NaN \n",
"\n",
" employment \\\n",
"0000-0001-9097-2281 [[senior engineer, robert bosch (india), benga... \n",
"0000-0002-8614-3007 NaN \n",
"0000-0003-3728-6439 [[professor, institución universitaria pascual... \n",
"0000-0001-8330-7443 [[postdoctoral associate, yale university, new... \n",
"0000-0001-9670-515X NaN \n",
"... ... \n",
"0000-0003-1204-6009 [[, university of oxford, oxford, oxfordshire,... \n",
"0000-0002-3472-7668 [[phd researcher, katholieke universiteit leuv... \n",
"0000-0002-9602-0529 NaN \n",
"0000-0003-4402-5982 [[co-owner, aeft acessory, manaus, amazonas, b... \n",
"0000-0002-1734-7241 [[post-doctor, zhejiang university, hangzhou, ... \n",
"\n",
" n_works works_source \\\n",
"0000-0001-9097-2281 1 [abhishek solanki] \n",
"0000-0002-8614-3007 0 NaN \n",
"0000-0003-3728-6439 1 [crossref] \n",
"0000-0001-8330-7443 4 [andrea tesoniero] \n",
"0000-0001-9670-515X 0 NaN \n",
"... ... ... \n",
"0000-0003-1204-6009 10 [crossref metadata search] \n",
"0000-0002-3472-7668 0 NaN \n",
"0000-0002-9602-0529 1 [crossref] \n",
"0000-0003-4402-5982 0 NaN \n",
"0000-0002-1734-7241 6 [manareldeen ahmed] \n",
"\n",
" activation_date last_update_date \\\n",
"0000-0001-9097-2281 2019-04-22t04:43:06.232z 2020-07-02t14:18:28.305z \n",
"0000-0002-8614-3007 2017-11-15t06:33:45.625z 2017-11-15t06:44:02.998z \n",
"0000-0003-3728-6439 2019-03-31t00:00:42.929z 2020-09-06t02:18:54.290z \n",
"0000-0001-8330-7443 2015-03-09t11:59:06.093z 2020-08-20t15:03:23.447z \n",
"0000-0001-9670-515X 2020-07-26t10:38:03.721z 2020-07-26t10:52:26.539z \n",
"... ... ... \n",
"0000-0003-1204-6009 2016-07-28t14:24:16.844z 2020-10-13t11:47:50.621z \n",
"0000-0002-3472-7668 2020-10-14t13:56:44.779z 2020-10-16t14:21:40.673z \n",
"0000-0002-9602-0529 2013-09-16t16:52:06.120z 2020-12-01t22:47:08.074z \n",
"0000-0003-4402-5982 2020-03-02t20:11:01.699z 2020-12-04t13:53:39.404z \n",
"0000-0002-1734-7241 2017-02-17t13:18:36.540z 2020-12-04t02:04:36.668z \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label \\\n",
"0000-0001-9097-2281 0 0 0 0 0 \n",
"0000-0002-8614-3007 0 0 0 0 0 \n",
"0000-0003-3728-6439 1 0 0 0 0 \n",
"0000-0001-8330-7443 4 0 0 2 0 \n",
"0000-0001-9670-515X 0 0 0 0 0 \n",
"... ... ... ... ... ... \n",
"0000-0003-1204-6009 10 0 0 0 0 \n",
"0000-0002-3472-7668 0 0 0 0 0 \n",
"0000-0002-9602-0529 1 0 0 0 0 \n",
"0000-0003-4402-5982 0 0 0 0 0 \n",
"0000-0002-1734-7241 6 0 0 3 0 \n",
"\n",
" primary_email_domain other_email_domains \\\n",
"0000-0001-9097-2281 NaN [in.bosch.com] \n",
"0000-0002-8614-3007 NaN [hct.ac.ae] \n",
"0000-0003-3728-6439 NaN [pascualbravo.edu.co] \n",
"0000-0001-8330-7443 NaN [yale.edu] \n",
"0000-0001-9670-515X NaN [gmail.com] \n",
"... ... ... \n",
"0000-0003-1204-6009 NaN [cs.ox.ac.uk] \n",
"0000-0002-3472-7668 NaN [kuleuven.be] \n",
"0000-0002-9602-0529 NaN [cecot.com.br] \n",
"0000-0003-4402-5982 NaN [ime.eb.br] \n",
"0000-0002-1734-7241 NaN [hotmail.com] \n",
"\n",
" url_domains n_emails n_urls n_ids \\\n",
"0000-0001-9097-2281 [github.com, linkedin.com] 1.0 2.0 NaN \n",
"0000-0002-8614-3007 NaN 1.0 NaN NaN \n",
"0000-0003-3728-6439 NaN 1.0 NaN NaN \n",
"0000-0001-8330-7443 NaN 1.0 NaN 1.0 \n",
"0000-0001-9670-515X NaN 1.0 NaN NaN \n",
"... ... ... ... ... \n",
"0000-0003-1204-6009 [fu-berlin.de] 1.0 1.0 NaN \n",
"0000-0002-3472-7668 [linkedin.com] 1.0 1.0 NaN \n",
"0000-0002-9602-0529 NaN 1.0 NaN NaN \n",
"0000-0003-4402-5982 NaN 1.0 NaN NaN \n",
"0000-0002-1734-7241 NaN 1.0 NaN NaN \n",
"\n",
" n_keywords n_education n_employment \n",
"0000-0001-9097-2281 NaN NaN 2.0 \n",
"0000-0002-8614-3007 NaN NaN NaN \n",
"0000-0003-3728-6439 7.0 1.0 1.0 \n",
"0000-0001-8330-7443 NaN 4.0 2.0 \n",
"0000-0001-9670-515X 3.0 3.0 NaN \n",
"... ... ... ... \n",
"0000-0003-1204-6009 NaN 3.0 2.0 \n",
"0000-0002-3472-7668 NaN 2.0 1.0 \n",
"0000-0002-9602-0529 NaN NaN NaN \n",
"0000-0003-4402-5982 NaN 2.0 1.0 \n",
"0000-0002-1734-7241 5.0 NaN 1.0 \n",
"\n",
"[19692 rows x 30 columns]"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.primary_email.isna() & df.other_email_domains.notna()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## URLs"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0000-0001-7402-0096</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, kth royal institute of technology, stockho...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2015-01-11t15:13:06.467z</td>\n",
" <td>2016-06-14t23:55:59.896z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[kth.se]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0001-8377-3508</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[fontana, milena da silva]</td>\n",
" <td>NaN</td>\n",
" <td>[educação; informática; matemática.]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, instituto federal de educação, ciência e t...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-05-23t23:39:04.534z</td>\n",
" <td>2019-10-16t02:50:11.007z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[cnpq.br]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-2638-4108</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>investigador de la universidad de oviedo. depa...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[constitutional law, history of political thou...</td>\n",
" <td>[[scopus author id, 54394231000]]</td>\n",
" <td>[[public law, ph doctor, university of oviedo,...</td>\n",
" <td>[[professor of constitutional law, university ...</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>2013-03-25t14:38:06.016z</td>\n",
" <td>2020-07-01t13:10:37.025z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[unioviedo.es]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-1435-6545</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[migration, culture cell, prostate cancer]</td>\n",
" <td>[[researcherid, p-2223-2018]]</td>\n",
" <td>[[morfologia, , universidade estadual paulista...</td>\n",
" <td>[[, universidade estadual paulista (unesp), in...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2018-08-09t12:12:24.405z</td>\n",
" <td>2020-04-22t01:38:03.184z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[cnpq.br, linkedin.com]</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-1284-9741</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>alex percy antonio</td>\n",
" <td>manriquez paisig</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-09-08t20:04:33.906z</td>\n",
" <td>2020-09-08t20:25:55.432z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[youtube.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"0000-0001-7402-0096 1 1 1 \n",
"0000-0001-8377-3508 1 1 1 \n",
"0000-0002-2638-4108 1 1 1 \n",
"0000-0003-1435-6545 1 1 1 \n",
"0000-0003-1284-9741 1 1 1 \n",
"\n",
" given_names family_name \\\n",
"0000-0001-7402-0096 NaN NaN \n",
"0000-0001-8377-3508 NaN NaN \n",
"0000-0002-2638-4108 NaN NaN \n",
"0000-0003-1435-6545 NaN NaN \n",
"0000-0003-1284-9741 alex percy antonio manriquez paisig \n",
"\n",
" biography \\\n",
"0000-0001-7402-0096 NaN \n",
"0000-0001-8377-3508 NaN \n",
"0000-0002-2638-4108 investigador de la universidad de oviedo. depa... \n",
"0000-0003-1435-6545 NaN \n",
"0000-0003-1284-9741 NaN \n",
"\n",
" other_names primary_email \\\n",
"0000-0001-7402-0096 NaN NaN \n",
"0000-0001-8377-3508 [fontana, milena da silva] NaN \n",
"0000-0002-2638-4108 NaN NaN \n",
"0000-0003-1435-6545 NaN NaN \n",
"0000-0003-1284-9741 NaN NaN \n",
"\n",
" keywords \\\n",
"0000-0001-7402-0096 NaN \n",
"0000-0001-8377-3508 [educação; informática; matemática.] \n",
"0000-0002-2638-4108 [constitutional law, history of political thou... \n",
"0000-0003-1435-6545 [migration, culture cell, prostate cancer] \n",
"0000-0003-1284-9741 NaN \n",
"\n",
" external_ids \\\n",
"0000-0001-7402-0096 NaN \n",
"0000-0001-8377-3508 NaN \n",
"0000-0002-2638-4108 [[scopus author id, 54394231000]] \n",
"0000-0003-1435-6545 [[researcherid, p-2223-2018]] \n",
"0000-0003-1284-9741 NaN \n",
"\n",
" education \\\n",
"0000-0001-7402-0096 NaN \n",
"0000-0001-8377-3508 NaN \n",
"0000-0002-2638-4108 [[public law, ph doctor, university of oviedo,... \n",
"0000-0003-1435-6545 [[morfologia, , universidade estadual paulista... \n",
"0000-0003-1284-9741 NaN \n",
"\n",
" employment \\\n",
"0000-0001-7402-0096 [[, kth royal institute of technology, stockho... \n",
"0000-0001-8377-3508 [[, instituto federal de educação, ciência e t... \n",
"0000-0002-2638-4108 [[professor of constitutional law, university ... \n",
"0000-0003-1435-6545 [[, universidade estadual paulista (unesp), in... \n",
"0000-0003-1284-9741 NaN \n",
"\n",
" n_works works_source activation_date \\\n",
"0000-0001-7402-0096 0 NaN 2015-01-11t15:13:06.467z \n",
"0000-0001-8377-3508 0 NaN 2018-05-23t23:39:04.534z \n",
"0000-0002-2638-4108 1 [crossref] 2013-03-25t14:38:06.016z \n",
"0000-0003-1435-6545 0 NaN 2018-08-09t12:12:24.405z \n",
"0000-0003-1284-9741 0 NaN 2020-09-08t20:04:33.906z \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc \\\n",
"0000-0001-7402-0096 2016-06-14t23:55:59.896z 0 0 0 \n",
"0000-0001-8377-3508 2019-10-16t02:50:11.007z 0 0 0 \n",
"0000-0002-2638-4108 2020-07-01t13:10:37.025z 1 0 0 \n",
"0000-0003-1435-6545 2020-04-22t01:38:03.184z 0 0 0 \n",
"0000-0003-1284-9741 2020-09-08t20:25:55.432z 0 0 0 \n",
"\n",
" n_other_pids label primary_email_domain \\\n",
"0000-0001-7402-0096 0 0 NaN \n",
"0000-0001-8377-3508 0 0 NaN \n",
"0000-0002-2638-4108 0 0 NaN \n",
"0000-0003-1435-6545 0 0 NaN \n",
"0000-0003-1284-9741 0 0 NaN \n",
"\n",
" other_email_domains url_domains n_emails \\\n",
"0000-0001-7402-0096 NaN [kth.se] NaN \n",
"0000-0001-8377-3508 NaN [cnpq.br] NaN \n",
"0000-0002-2638-4108 NaN [unioviedo.es] NaN \n",
"0000-0003-1435-6545 NaN [cnpq.br, linkedin.com] NaN \n",
"0000-0003-1284-9741 NaN [youtube.com] NaN \n",
"\n",
" n_urls n_ids n_keywords n_education n_employment \n",
"0000-0001-7402-0096 1.0 NaN NaN NaN 1.0 \n",
"0000-0001-8377-3508 1.0 NaN 1.0 NaN 3.0 \n",
"0000-0002-2638-4108 1.0 1.0 3.0 1.0 1.0 \n",
"0000-0003-1435-6545 2.0 1.0 3.0 1.0 1.0 \n",
"0000-0003-1284-9741 1.0 NaN NaN NaN NaN "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.url_domains.notna()].head()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>n_urls</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0000-0002-1234-835X</th>\n",
" <td>1</td>\n",
" <td>219.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0001-7478-4539</th>\n",
" <td>1</td>\n",
" <td>174.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-7392-3792</th>\n",
" <td>1</td>\n",
" <td>169.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-6938-9638</th>\n",
" <td>1</td>\n",
" <td>152.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-5710-4041</th>\n",
" <td>1</td>\n",
" <td>114.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-1686-1935</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-3800-6331</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-8783-5814</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-7584-2283</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-0529-3538</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10916574 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid n_urls\n",
"0000-0002-1234-835X 1 219.0\n",
"0000-0001-7478-4539 1 174.0\n",
"0000-0002-7392-3792 1 169.0\n",
"0000-0002-6938-9638 1 152.0\n",
"0000-0002-5710-4041 1 114.0\n",
"... ... ...\n",
"0000-0002-1686-1935 1 NaN\n",
"0000-0002-3800-6331 1 NaN\n",
"0000-0002-8783-5814 1 NaN\n",
"0000-0002-7584-2283 1 NaN\n",
"0000-0003-0529-3538 1 NaN\n",
"\n",
"[10916574 rows x 2 columns]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"urls_by_orcid = df.sort_values('n_urls', ascending=False)[['orcid', 'n_urls']]\n",
"urls_by_orcid"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"0000-0002-1234-835X",
"0000-0001-7478-4539",
"0000-0002-7392-3792",
"0000-0002-6938-9638",
"0000-0002-5710-4041",
"0000-0003-2450-090X",
"0000-0002-3920-7389",
"0000-0002-6689-4129",
"0000-0001-5384-0001",
"0000-0002-4621-5571",
"0000-0002-7754-8889",
"0000-0001-9131-1266",
"0000-0002-9025-8632",
"0000-0002-5250-1144",
"0000-0003-0321-7339",
"0000-0003-0176-1293",
"0000-0002-7456-3848",
"0000-0002-8493-0402",
"0000-0002-9965-2425",
"0000-0001-8873-6677",
"0000-0002-3997-5070",
"0000-0002-1856-6905",
"0000-0002-4062-3603",
"0000-0002-4316-1467",
"0000-0003-1524-6268",
"0000-0002-0752-7513",
"0000-0001-5880-7091",
"0000-0003-0594-2462",
"0000-0002-1298-5252",
"0000-0003-2593-7134",
"0000-0003-2383-8386",
"0000-0003-1761-3842",
"0000-0003-3546-2312",
"0000-0002-2886-9248",
"0000-0003-4948-9268",
"0000-0003-2183-8112",
"0000-0002-1929-6054",
"0000-0003-2407-3557",
"0000-0002-7568-3403",
"0000-0003-0796-0234",
"0000-0002-9276-6921",
"0000-0002-4305-4215",
"0000-0003-1484-6958",
"0000-0001-7133-6896",
"0000-0002-4004-6666",
"0000-0002-8208-0897",
"0000-0003-0930-6121",
"0000-0003-4993-5555",
"0000-0002-9071-5450",
"0000-0002-8116-9611",
"0000-0002-3277-9659",
"0000-0001-9559-1103",
"0000-0002-8122-879X",
"0000-0002-2000-8339",
"0000-0003-2862-6315",
"0000-0002-6547-0172",
"0000-0003-4808-6619",
"0000-0002-6254-8683",
"0000-0002-5139-2660",
"0000-0001-5300-4601",
"0000-0002-0971-9375",
"0000-0003-3933-0229",
"0000-0003-1585-1134",
"0000-0003-0694-1154",
"0000-0002-4659-5391",
"0000-0001-6783-2037",
"0000-0001-6461-2573",
"0000-0003-4501-3756",
"0000-0002-2916-2893",
"0000-0001-5549-6822",
"0000-0003-4326-9336",
"0000-0001-8096-4333",
"0000-0002-8940-3177",
"0000-0001-8978-4830",
"0000-0002-6680-1703",
"0000-0002-8593-9257",
"0000-0002-5946-1595",
"0000-0002-7653-4899",
"0000-0002-5196-4905",
"0000-0003-1904-4188",
"0000-0001-8808-4867",
"0000-0001-6921-0426",
"0000-0003-1815-1993",
"0000-0002-7843-8497",
"0000-0003-1675-2840",
"0000-0001-8644-2114",
"0000-0001-8986-2528",
"0000-0001-7784-0583",
"0000-0003-0907-9870",
"0000-0002-5265-6074",
"0000-0001-7550-5802",
"0000-0002-7179-6953",
"0000-0002-3334-9386",
"0000-0001-9102-8639",
"0000-0002-0696-8560",
"0000-0001-6979-4273",
"0000-0001-7193-5039",
"0000-0001-6714-009X",
"0000-0002-9771-600X",
"0000-0001-7608-9433"
],
"y": [
219,
174,
169,
152,
114,
114,
111,
104,
104,
90,
83,
83,
81,
81,
80,
80,
80,
76,
73,
72,
71,
70,
69,
69,
68,
68,
68,
68,
67,
67,
66,
66,
65,
64,
61,
61,
61,
59,
57,
57,
57,
57,
57,
57,
57,
56,
55,
55,
55,
55,
50,
50,
50,
49,
49,
48,
48,
48,
48,
48,
47,
47,
46,
46,
46,
45,
45,
45,
45,
44,
43,
43,
43,
43,
42,
42,
42,
41,
41,
41,
40,
40,
39,
39,
39,
39,
38,
38,
38,
38,
38,
37,
37,
37,
37,
37,
36,
36,
36,
36
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 100 ORCID iDs with URLs"
},
"xaxis": {
"range": [
-0.5,
99.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"0e607391-8ec7-4259-853c-155913efe159\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"0e607391-8ec7-4259-853c-155913efe159\")) { Plotly.newPlot( \"0e607391-8ec7-4259-853c-155913efe159\", [{\"type\": \"bar\", \"x\": [\"0000-0002-1234-835X\", \"0000-0001-7478-4539\", \"0000-0002-7392-3792\", \"0000-0002-6938-9638\", \"0000-0002-5710-4041\", \"0000-0003-2450-090X\", \"0000-0002-3920-7389\", \"0000-0002-6689-4129\", \"0000-0001-5384-0001\", \"0000-0002-4621-5571\", \"0000-0002-7754-8889\", \"0000-0001-9131-1266\", \"0000-0002-9025-8632\", \"0000-0002-5250-1144\", \"0000-0003-0321-7339\", \"0000-0003-0176-1293\", \"0000-0002-7456-3848\", \"0000-0002-8493-0402\", \"0000-0002-9965-2425\", \"0000-0001-8873-6677\", \"0000-0002-3997-5070\", \"0000-0002-1856-6905\", \"0000-0002-4062-3603\", \"0000-0002-4316-1467\", \"0000-0003-1524-6268\", \"0000-0002-0752-7513\", \"0000-0001-5880-7091\", \"0000-0003-0594-2462\", \"0000-0002-1298-5252\", \"0000-0003-2593-7134\", \"0000-0003-2383-8386\", \"0000-0003-1761-3842\", \"0000-0003-3546-2312\", \"0000-0002-2886-9248\", \"0000-0003-4948-9268\", \"0000-0003-2183-8112\", \"0000-0002-1929-6054\", \"0000-0003-2407-3557\", \"0000-0002-7568-3403\", \"0000-0003-0796-0234\", \"0000-0002-9276-6921\", \"0000-0002-4305-4215\", \"0000-0003-1484-6958\", \"0000-0001-7133-6896\", \"0000-0002-4004-6666\", \"0000-0002-8208-0897\", \"0000-0003-0930-6121\", \"0000-0003-4993-5555\", \"0000-0002-9071-5450\", \"0000-0002-8116-9611\", \"0000-0002-3277-9659\", \"0000-0001-9559-1103\", \"0000-0002-8122-879X\", \"0000-0002-2000-8339\", \"0000-0003-2862-6315\", \"0000-0002-6547-0172\", \"0000-0003-4808-6619\", \"0000-0002-6254-8683\", \"0000-0002-5139-2660\", \"0000-0001-5300-4601\", \"0000-0002-0971-9375\", \"0000-0003-3933-0229\", \"0000-0003-1585-1134\", \"0000-0003-0694-1154\", \"0000-0002-4659-5391\", \"0000-0001-6783-2037\", \"0000-0001-6461-2573\", \"0000-0003-4501-3756\", \"0000-0002-2916-2893\", \"0000-0001-5549-6822\", \"0000-0003-4326-9336\", \"0000-0001-8096-4333\", \"0000-0002-8940-3177\", \"0000-0001-8978-4830\", \"0000-0002-6680-1703\", \"0000-0002-8593-9257\", \"0000-0002-5946-1595\", \"0000-0002-7653-4899\", \"0000-0002-5196-4905\", \"0000-0003-1904-4188\", \"0000-0001-8808-4867\", \"0000-0001-6921-0426\", \"0000-0003-1815-1993\", \"0000-0002-7843-8497\", \"0000-0003-1675-2840\", \"0000-0001-8644-2114\", \"0000-0001-8986-2528\", \"0000-0001-7784-0583\", \"0000-0003-0907-9870\", \"0000-0002-5265-6074\", \"0000-0001-7550-5802\", \"0000-0002-7179-6953\", \"0000-0002-3334-9386\", \"0000-0001-9102-8639\", \"0000-0002-0696-8560\", \"0000-0001-6979-4273\", \"0000-0001-7193-5039\", \"0000-0001-6714-009X\", \"0000-0002-9771-600X\", \"0000-0001-7608-9433\"], \"y\": [219.0, 174.0, 169.0, 152.0, 114.0, 114.0, 111.0, 104.0, 104.0, 90.0, 83.0, 83.0, 81.0, 81.0, 80.0, 80.0, 80.0, 76.0, 73.0, 72.0, 71.0, 70.0, 69.0, 69.0, 68.0, 68.0, 68.0, 68.0, 67.0, 67.0, 66.0, 66.0, 65.0, 64.0, 61.0, 61.0, 61.0, 59.0, 57.0, 57.0, 57.0, 57.0, 57.0, 57.0, 57.0, 56.0, 55.0, 55.0, 55.0, 55.0, 50.0, 50.0, 50.0, 49.0, 49.0, 48.0, 48.0, 48.0, 48.0, 48.0, 47.0, 47.0, 46.0, 46.0, 46.0, 45.0, 45.0, 45.0, 45.0, 44.0, 43.0, 43.0, 43.0, 43.0, 42.0, 42.0, 42.0, 41.0, 41.0, 41.0, 40.0, 40.0, 39.0, 39.0, 39.0, 39.0, 38.0, 38.0, 38.0, 38.0, 38.0, 37.0, 37.0, 37.0, 37.0, 37.0, 36.0, 36.0, 36.0, 36.0]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"
" \n",
"var gd = document.getElementById('0e607391-8ec7-4259-853c-155913efe159');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(100)\n",
"data = [\n",
" go.Bar(\n",
" x=urls_by_orcid[:TOP_N].index,\n",
" y=urls_by_orcid[:TOP_N]['n_urls']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top %s ORCID iDs with URLs' % TOP_N,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"top_urls = df[['orcid', 'url_domains']]\\\n",
" .explode('url_domains')\\\n",
" .reset_index(drop=True)\\\n",
" .groupby('url_domains')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"linkedin.com",
"researchgate.net",
"google.com",
"cnpq.br",
"academia.edu",
"twitter.com",
"facebook.com",
"publons.com",
"wordpress.com",
"mendeley.com",
"instagram.com",
"github.io",
"google.com.ua",
"blogspot.com",
"github.com",
"google.es",
"helsinki.fi",
"unirioja.es",
"youtube.com",
"wixsite.com",
"ku.dk",
"scopus.com",
"",
"weebly.com",
"us.es",
"kth.se",
"cityu.edu.hk",
"kcl.ac.uk",
"au.dk",
"man.ac.uk",
"google.com.au",
"ucl.ac.uk",
"sdu.dk",
"ugr.es",
"researcherid.com",
"mq.edu.au",
"ntu.edu.tw",
"rug.nl",
"colciencias.gov.co",
"google.co.in",
"dtu.dk",
"bris.ac.uk",
"uwa.edu.au",
"bu.edu",
"uc3m.es",
"vub.be",
"monash.edu",
"google.co.uk",
"aau.dk",
"lancs.ac.uk"
],
"y": [
77558,
67357,
44397,
24439,
21054,
18771,
15121,
10622,
8996,
6978,
5881,
5479,
5335,
5240,
5199,
5134,
4711,
4572,
4396,
4120,
3756,
3558,
3494,
3115,
3034,
2952,
2793,
2720,
2717,
2693,
2606,
2585,
2465,
2224,
2133,
2131,
2093,
1940,
1927,
1904,
1880,
1838,
1808,
1805,
1803,
1788,
1772,
1652,
1652,
1648
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top-50 URL domains"
},
"xaxis": {
"range": [
-0.5,
49.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"c6500b12-96cc-4ab3-bca2-b00545d9a385\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"c6500b12-96cc-4ab3-bca2-b00545d9a385\")) { Plotly.newPlot( \"c6500b12-96cc-4ab3-bca2-b00545d9a385\", [{\"type\": \"bar\", \"x\": [\"linkedin.com\", \"researchgate.net\", \"google.com\", \"cnpq.br\", \"academia.edu\", \"twitter.com\", \"facebook.com\", \"publons.com\", \"wordpress.com\", \"mendeley.com\", \"instagram.com\", \"github.io\", \"google.com.ua\", \"blogspot.com\", \"github.com\", \"google.es\", \"helsinki.fi\", \"unirioja.es\", \"youtube.com\", \"wixsite.com\", \"ku.dk\", \"scopus.com\", \"\", \"weebly.com\", \"us.es\", \"kth.se\", \"cityu.edu.hk\", \"kcl.ac.uk\", \"au.dk\", \"man.ac.uk\", \"google.com.au\", \"ucl.ac.uk\", \"sdu.dk\", \"ugr.es\", \"researcherid.com\", \"mq.edu.au\", \"ntu.edu.tw\", \"rug.nl\", \"colciencias.gov.co\", \"google.co.in\", \"dtu.dk\", \"bris.ac.uk\", \"uwa.edu.au\", \"bu.edu\", \"uc3m.es\", \"vub.be\", \"monash.edu\", \"google.co.uk\", \"aau.dk\", \"lancs.ac.uk\"], \"y\": [77558, 67357, 44397, 24439, 21054, 18771, 15121, 10622, 8996, 6978, 5881, 5479, 5335, 5240, 5199, 5134, 4711, 4572, 4396, 4120, 3756, 3558, 3494, 3115, 3034, 2952, 2793, 2720, 2717, 2693, 2606, 2585, 2465, 2224, 2133, 2131, 2093, 1940, 1927, 1904, 1880, 1838, 1808, 1805, 1803, 1788, 1772, 1652, 1652, 1648]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"
" \n",
"var gd = document.getElementById('c6500b12-96cc-4ab3-bca2-b00545d9a385');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(50)\n",
"data = [\n",
" go.Bar(\n",
" x=top_urls[:TOP_N].index,\n",
" y=top_urls[:TOP_N]['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top-%s URL domains' % TOP_N,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## URLs speculation"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0000-0002-5710-4041</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>ryszard</td>\n",
" <td>romaniuk</td>\n",
" <td>professor of electronics and communications en...</td>\n",
" <td>[r.romaniuk, r.s.romaniuk, ryszard romaniuk, r...</td>\n",
" <td>rrom@ise.pw.edu.pl</td>\n",
" <td>[measurement systems, telecommunications, rese...</td>\n",
" <td>[[isni, 0000000071432485], [researcherid, b-91...</td>\n",
" <td>[[faculty of electronics and information techn...</td>\n",
" <td>[[professor, institute director, politechnika ...</td>\n",
" <td>5008</td>\n",
" <td>[inspire-hep, researcherid, isni2orcid search ...</td>\n",
" <td>2013-01-20t12:09:21.600z</td>\n",
" <td>2021-03-11t20:57:13.284z</td>\n",
" <td>1221</td>\n",
" <td>25</td>\n",
" <td>0</td>\n",
" <td>1742</td>\n",
" <td>0</td>\n",
" <td>ise.pw.edu.pl</td>\n",
" <td>[ise.pw.edu.pl, elka.pw.edu.pl, cern.ch]</td>\n",
" <td>[google.pl, publons.com, scopus.com, mendeley....</td>\n",
" <td>3.0</td>\n",
" <td>114.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-1929-6054</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>franklin américo</td>\n",
" <td>canaza choque</td>\n",
" <td>docente-investigador social. maestrando en der...</td>\n",
" <td>[franklin américo canaza-choque , franklin a. ...</td>\n",
" <td>leo_123fa@hotmail.com</td>\n",
" <td>[justicia global; democracia; derechos humanos...</td>\n",
" <td>[[researcherid, p-8613-2018], [loop profile, 8...</td>\n",
" <td>[[facultad de ciencias de la educación , maest...</td>\n",
" <td>[[investigador social, universidad católica de...</td>\n",
" <td>38</td>\n",
" <td>[researcherid, base - bielefeld academic searc...</td>\n",
" <td>2017-09-15t19:45:43.483z</td>\n",
" <td>2021-03-14t20:20:21.282z</td>\n",
" <td>29</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>33</td>\n",
" <td>0</td>\n",
" <td>hotmail.com</td>\n",
" <td>[gmail.com, gmail.com, hotmail.com, baldwin.ed...</td>\n",
" <td>[concytec.gob.pe, redalyc.org, redalyc.org, un...</td>\n",
" <td>5.0</td>\n",
" <td>61.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-2407-3557</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>abdul</td>\n",
" <td>aziz</td>\n",
" <td>abdul aziz was born on may 25, 1973, in brebes...</td>\n",
" <td>[abdul aziz, aziz, abdul, aziz, a., aziz, abd,...</td>\n",
" <td>NaN</td>\n",
" <td>[etika bisnis islam, ekonomi islam, ilmu ekono...</td>\n",
" <td>NaN</td>\n",
" <td>[[ilmu ekonomi, dr, universitas borobudur, jak...</td>\n",
" <td>[[assisten professor/dr, institut agama islam ...</td>\n",
" <td>72</td>\n",
" <td>[base - bielefeld academic search engine, abdu...</td>\n",
" <td>2016-09-12t04:41:24.842z</td>\n",
" <td>2021-01-26t11:58:33.039z</td>\n",
" <td>19</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>77</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com, syekhnurjati.ac.id, orcid.org, bl...</td>\n",
" <td>NaN</td>\n",
" <td>59.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-3997-5070</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>dr. parameshachari</td>\n",
" <td>b d</td>\n",
" <td>dr. parameshachari b dacm distinguished speake...</td>\n",
" <td>[dr. parameshachari b d]</td>\n",
" <td>NaN</td>\n",
" <td>[honorary secretary| iete mysuru centre, profe...</td>\n",
" <td>[[researcherid, f-7045-2018], [scopus author i...</td>\n",
" <td>[[electronics and communication engineering, p...</td>\n",
" <td>[[acm distinguished speaker (volunteer), assoc...</td>\n",
" <td>93</td>\n",
" <td>[publons, multidisciplinary digital publishing...</td>\n",
" <td>2016-08-24t11:00:30.403z</td>\n",
" <td>2021-03-14t07:11:09.817z</td>\n",
" <td>47</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>48</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[geethashishu.in, geethashishu.in, acm.org, go...</td>\n",
" <td>NaN</td>\n",
" <td>71.0</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>5.0</td>\n",
" <td>10.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-2450-090X</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>eduard</td>\n",
" <td>babulak</td>\n",
" <td>professor eduard babulak is accomplished inter...</td>\n",
" <td>[professor eduard babulak]</td>\n",
" <td>NaN</td>\n",
" <td>[internet of things, computer networking, inte...</td>\n",
" <td>[[scopus author id, 6506867432], [researcherid...</td>\n",
" <td>[[information technology, doctor habilitated (...</td>\n",
" <td>[[consultant, horizon 2020 framework programme...</td>\n",
" <td>274</td>\n",
" <td>[the lens, base - bielefeld academic search en...</td>\n",
" <td>2013-04-03t08:02:30.013z</td>\n",
" <td>2021-02-28t10:07:13.231z</td>\n",
" <td>199</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>174</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[worldassessmentcouncil.org, spseke.sk, bcs.or...</td>\n",
" <td>NaN</td>\n",
" <td>114.0</td>\n",
" <td>5.0</td>\n",
" <td>8.0</td>\n",
" <td>6.0</td>\n",
" <td>22.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-2593-7134</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>aan</td>\n",
" <td>jaelani</td>\n",
" <td>all my papers can be downloaded from portal:re...</td>\n",
" <td>[jaelani, a., jaelani, aan]</td>\n",
" <td>aan_jaelani@syekhnurjati.ac.id</td>\n",
" <td>[islamic economics, history of islamic economi...</td>\n",
" <td>[[scopus author id, 57195963463], [loop profil...</td>\n",
" <td>[[post graduate, s3/dr, universitas islam nege...</td>\n",
" <td>[[dr, institut agama islam negeri syekh nurjat...</td>\n",
" <td>79</td>\n",
" <td>[publons, aan jaelani, scopus - elsevier, dime...</td>\n",
" <td>2016-03-02t18:37:44.989z</td>\n",
" <td>2021-03-08t03:42:22.593z</td>\n",
" <td>88</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>193</td>\n",
" <td>0</td>\n",
" <td>syekhnurjati.ac.id</td>\n",
" <td>[gmail.com]</td>\n",
" <td>[microsoft.com, twitter.com, academia.edu, aca...</td>\n",
" <td>1.0</td>\n",
" <td>67.0</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-3920-7389</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>а.</td>\n",
" <td>гусев</td>\n",
" <td>surname, name gusev alexander leonidovichdate...</td>\n",
" <td>[alexander l. gusev , alexander leonidovich gu...</td>\n",
" <td>NaN</td>\n",
" <td>[technologies of isotope separation, 3d - prin...</td>\n",
" <td>[[researcherid, f-8048-2014], [scopus author i...</td>\n",
" <td>[[chemical technology and cryogenic-vacuum tec...</td>\n",
" <td>[[general director, scientific technical centr...</td>\n",
" <td>472</td>\n",
" <td>[publons, datacite, scopus - elsevier, a.l. gu...</td>\n",
" <td>2014-05-14t00:01:28.030z</td>\n",
" <td>2021-01-16t13:44:14.134z</td>\n",
" <td>37</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[youtube.com, isjaee.com, researchgate.net, re...</td>\n",
" <td>NaN</td>\n",
" <td>111.0</td>\n",
" <td>2.0</td>\n",
" <td>16.0</td>\n",
" <td>2.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-4948-9268</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>gustavo</td>\n",
" <td>duperré</td>\n",
" <td>gustavo norberto duperré graduated in arts and...</td>\n",
" <td>[gustavo norberto duperré, duperré, g. n., gus...</td>\n",
" <td>gustavo.duperre@usal.edu.ar</td>\n",
" <td>[computer science, medieval and modern history...</td>\n",
" <td>[[scopus author id, 57195936346], [researcheri...</td>\n",
" <td>[[programme in history, history of art and ter...</td>\n",
" <td>[[titular professor, dirección general de cult...</td>\n",
" <td>41</td>\n",
" <td>[gustavo duperré, scopus - elsevier, publons, ...</td>\n",
" <td>2020-02-22t15:49:52.386z</td>\n",
" <td>2021-03-12t15:13:44.065z</td>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>34</td>\n",
" <td>0</td>\n",
" <td>usal.edu.ar</td>\n",
" <td>NaN</td>\n",
" <td>[icomos.ro, unirioja.es, unirioja.es, unc.edu....</td>\n",
" <td>NaN</td>\n",
" <td>61.0</td>\n",
" <td>2.0</td>\n",
" <td>11.0</td>\n",
" <td>6.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-2183-8112</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>pelayo munhoz</td>\n",
" <td>olea</td>\n",
" <td>pós-doutorado em gestão ambiental pela univers...</td>\n",
" <td>[ munhoz, pelayo olea, olea, pelayo, olea, p...</td>\n",
" <td>NaN</td>\n",
" <td>[inovação, empreendedorismo, sustentabilidade]</td>\n",
" <td>[[scopus author id, 55175503300], [researcheri...</td>\n",
" <td>[[, postdoctoral in environmental sustainabili...</td>\n",
" <td>[[professor, universidade federal do rio grand...</td>\n",
" <td>1108</td>\n",
" <td>[the lens, pelayo munhoz olea, dimensions, bas...</td>\n",
" <td>2013-02-04t17:25:34.723z</td>\n",
" <td>2021-03-10t14:05:17.770z</td>\n",
" <td>797</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>582</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c...</td>\n",
" <td>NaN</td>\n",
" <td>61.0</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>7.0</td>\n",
" <td>9.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-6938-9638</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>adolfo</td>\n",
" <td>catral sanabria</td>\n",
" <td>my education is in computer science, mathemati...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[loop profile, 747193]]</td>\n",
" <td>[[education, capacitación para la enseñanza en...</td>\n",
" <td>NaN</td>\n",
" <td>2023</td>\n",
" <td>[base - bielefeld academic search engine, data...</td>\n",
" <td>2019-05-07t19:27:02.210z</td>\n",
" <td>2020-12-10t23:39:15.236z</td>\n",
" <td>2022</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[researchgate.net, youtube.com, linkedin.com, ...</td>\n",
" <td>NaN</td>\n",
" <td>152.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>6.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-9025-8632</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>buycannabis</td>\n",
" <td>dispensary</td>\n",
" <td>we procure and deliver premium cannabis strain...</td>\n",
" <td>[we procure and deliver premium cannabis strai...</td>\n",
" <td>NaN</td>\n",
" <td>[cannabis community, cannabis culture, marijua...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>10</td>\n",
" <td>[goowonderland dispensary]</td>\n",
" <td>2020-12-09t21:19:46.004z</td>\n",
" <td>2020-12-10t01:17:28.772z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[goowonderland.com, goowonderland.com, goowond...</td>\n",
" <td>NaN</td>\n",
" <td>81.0</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-9965-2425</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>jaroslaw</td>\n",
" <td>spychala</td>\n",
" <td>jaroslaw spychala has received a doctoral degr...</td>\n",
" <td>[jaroslaw jozef spychala]</td>\n",
" <td>NaN</td>\n",
" <td>[photochemistry, organic chemistry, biochemist...</td>\n",
" <td>[[scopus author id, 7006745874]]</td>\n",
" <td>[[department of chemistry, postdoctoral associ...</td>\n",
" <td>[[assistant professor, adam mickiewicz univers...</td>\n",
" <td>29</td>\n",
" <td>[scopus - elsevier]</td>\n",
" <td>2014-09-18t12:34:14.242z</td>\n",
" <td>2020-02-11t14:31:25.544z</td>\n",
" <td>15</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>29</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[biowebspin.com, biowebspin.com, google.com, l...</td>\n",
" <td>NaN</td>\n",
" <td>73.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>4.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-4062-3603</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>juan de dios</td>\n",
" <td>beltrán mancilla</td>\n",
" <td>juan de dios beltrán mancilla (*) filósofo aut...</td>\n",
" <td>[juan de dios beltrán mancilla, filósofo autod...</td>\n",
" <td>NaN</td>\n",
" <td>[filosofia medicina arquitectura economía dere...</td>\n",
" <td>NaN</td>\n",
" <td>[[, diplomado en practicas directivas para or...</td>\n",
" <td>[[inspector general jornada vespertina // de 2...</td>\n",
" <td>11</td>\n",
" <td>[juan de dios beltr´´án mancilla]</td>\n",
" <td>2020-04-19t21:06:33.495z</td>\n",
" <td>2021-02-10t20:13:07.698z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[yumpu.com, ijopm.org, google.com, blogspot.co...</td>\n",
" <td>NaN</td>\n",
" <td>69.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>8.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"0000-0002-5710-4041 1 1 1 \n",
"0000-0002-1929-6054 1 1 1 \n",
"0000-0003-2407-3557 1 1 1 \n",
"0000-0002-3997-5070 1 1 1 \n",
"0000-0003-2450-090X 1 1 1 \n",
"0000-0003-2593-7134 1 1 1 \n",
"0000-0002-3920-7389 1 1 1 \n",
"0000-0003-4948-9268 1 1 1 \n",
"0000-0003-2183-8112 1 1 1 \n",
"0000-0002-6938-9638 1 1 1 \n",
"0000-0002-9025-8632 1 1 1 \n",
"0000-0002-9965-2425 1 1 1 \n",
"0000-0002-4062-3603 1 1 1 \n",
"\n",
" given_names family_name \\\n",
"0000-0002-5710-4041 ryszard romaniuk \n",
"0000-0002-1929-6054 franklin américo canaza choque \n",
"0000-0003-2407-3557 abdul aziz \n",
"0000-0002-3997-5070 dr. parameshachari b d \n",
"0000-0003-2450-090X eduard babulak \n",
"0000-0003-2593-7134 aan jaelani \n",
"0000-0002-3920-7389 а. гусев \n",
"0000-0003-4948-9268 gustavo duperré \n",
"0000-0003-2183-8112 pelayo munhoz olea \n",
"0000-0002-6938-9638 adolfo catral sanabria \n",
"0000-0002-9025-8632 buycannabis dispensary \n",
"0000-0002-9965-2425 jaroslaw spychala \n",
"0000-0002-4062-3603 juan de dios beltrán mancilla \n",
"\n",
" biography \\\n",
"0000-0002-5710-4041 professor of electronics and communications en... \n",
"0000-0002-1929-6054 docente-investigador social. maestrando en der... \n",
"0000-0003-2407-3557 abdul aziz was born on may 25, 1973, in brebes... \n",
"0000-0002-3997-5070 dr. parameshachari b dacm distinguished speake... \n",
"0000-0003-2450-090X professor eduard babulak is accomplished inter... \n",
"0000-0003-2593-7134 all my papers can be downloaded from portal:re... \n",
"0000-0002-3920-7389 surname, name gusev alexander leonidovichdate... \n",
"0000-0003-4948-9268 gustavo norberto duperré graduated in arts and... \n",
"0000-0003-2183-8112 pós-doutorado em gestão ambiental pela univers... \n",
"0000-0002-6938-9638 my education is in computer science, mathemati... \n",
"0000-0002-9025-8632 we procure and deliver premium cannabis strain... \n",
"0000-0002-9965-2425 jaroslaw spychala has received a doctoral degr... \n",
"0000-0002-4062-3603 juan de dios beltrán mancilla (*) filósofo aut... \n",
"\n",
" other_names \\\n",
"0000-0002-5710-4041 [r.romaniuk, r.s.romaniuk, ryszard romaniuk, r... \n",
"0000-0002-1929-6054 [franklin américo canaza-choque , franklin a. ... \n",
"0000-0003-2407-3557 [abdul aziz, aziz, abdul, aziz, a., aziz, abd,... \n",
"0000-0002-3997-5070 [dr. parameshachari b d] \n",
"0000-0003-2450-090X [professor eduard babulak] \n",
"0000-0003-2593-7134 [jaelani, a., jaelani, aan] \n",
"0000-0002-3920-7389 [alexander l. gusev , alexander leonidovich gu... \n",
"0000-0003-4948-9268 [gustavo norberto duperré, duperré, g. n., gus... \n",
"0000-0003-2183-8112 [ munhoz, pelayo olea, olea, pelayo, olea, p... \n",
"0000-0002-6938-9638 NaN \n",
"0000-0002-9025-8632 [we procure and deliver premium cannabis strai... \n",
"0000-0002-9965-2425 [jaroslaw jozef spychala] \n",
"0000-0002-4062-3603 [juan de dios beltrán mancilla, filósofo autod... \n",
"\n",
" primary_email \\\n",
"0000-0002-5710-4041 rrom@ise.pw.edu.pl \n",
"0000-0002-1929-6054 leo_123fa@hotmail.com \n",
"0000-0003-2407-3557 NaN \n",
"0000-0002-3997-5070 NaN \n",
"0000-0003-2450-090X NaN \n",
"0000-0003-2593-7134 aan_jaelani@syekhnurjati.ac.id \n",
"0000-0002-3920-7389 NaN \n",
"0000-0003-4948-9268 gustavo.duperre@usal.edu.ar \n",
"0000-0003-2183-8112 NaN \n",
"0000-0002-6938-9638 NaN \n",
"0000-0002-9025-8632 NaN \n",
"0000-0002-9965-2425 NaN \n",
"0000-0002-4062-3603 NaN \n",
"\n",
" keywords \\\n",
"0000-0002-5710-4041 [measurement systems, telecommunications, rese... \n",
"0000-0002-1929-6054 [justicia global; democracia; derechos humanos... \n",
"0000-0003-2407-3557 [etika bisnis islam, ekonomi islam, ilmu ekono... \n",
"0000-0002-3997-5070 [honorary secretary| iete mysuru centre, profe... \n",
"0000-0003-2450-090X [internet of things, computer networking, inte... \n",
"0000-0003-2593-7134 [islamic economics, history of islamic economi... \n",
"0000-0002-3920-7389 [technologies of isotope separation, 3d - prin... \n",
"0000-0003-4948-9268 [computer science, medieval and modern history... \n",
"0000-0003-2183-8112 [inovação, empreendedorismo, sustentabilidade] \n",
"0000-0002-6938-9638 NaN \n",
"0000-0002-9025-8632 [cannabis community, cannabis culture, marijua... \n",
"0000-0002-9965-2425 [photochemistry, organic chemistry, biochemist... \n",
"0000-0002-4062-3603 [filosofia medicina arquitectura economía dere... \n",
"\n",
" external_ids \\\n",
"0000-0002-5710-4041 [[isni, 0000000071432485], [researcherid, b-91... \n",
"0000-0002-1929-6054 [[researcherid, p-8613-2018], [loop profile, 8... \n",
"0000-0003-2407-3557 NaN \n",
"0000-0002-3997-5070 [[researcherid, f-7045-2018], [scopus author i... \n",
"0000-0003-2450-090X [[scopus author id, 6506867432], [researcherid... \n",
"0000-0003-2593-7134 [[scopus author id, 57195963463], [loop profil... \n",
"0000-0002-3920-7389 [[researcherid, f-8048-2014], [scopus author i... \n",
"0000-0003-4948-9268 [[scopus author id, 57195936346], [researcheri... \n",
"0000-0003-2183-8112 [[scopus author id, 55175503300], [researcheri... \n",
"0000-0002-6938-9638 [[loop profile, 747193]] \n",
"0000-0002-9025-8632 NaN \n",
"0000-0002-9965-2425 [[scopus author id, 7006745874]] \n",
"0000-0002-4062-3603 NaN \n",
"\n",
" education \\\n",
"0000-0002-5710-4041 [[faculty of electronics and information techn... \n",
"0000-0002-1929-6054 [[facultad de ciencias de la educación , maest... \n",
"0000-0003-2407-3557 [[ilmu ekonomi, dr, universitas borobudur, jak... \n",
"0000-0002-3997-5070 [[electronics and communication engineering, p... \n",
"0000-0003-2450-090X [[information technology, doctor habilitated (... \n",
"0000-0003-2593-7134 [[post graduate, s3/dr, universitas islam nege... \n",
"0000-0002-3920-7389 [[chemical technology and cryogenic-vacuum tec... \n",
"0000-0003-4948-9268 [[programme in history, history of art and ter... \n",
"0000-0003-2183-8112 [[, postdoctoral in environmental sustainabili... \n",
"0000-0002-6938-9638 [[education, capacitación para la enseñanza en... \n",
"0000-0002-9025-8632 NaN \n",
"0000-0002-9965-2425 [[department of chemistry, postdoctoral associ... \n",
"0000-0002-4062-3603 [[, diplomado en practicas directivas para or... \n",
"\n",
" employment \\\n",
"0000-0002-5710-4041 [[professor, institute director, politechnika ... \n",
"0000-0002-1929-6054 [[investigador social, universidad católica de... \n",
"0000-0003-2407-3557 [[assisten professor/dr, institut agama islam ... \n",
"0000-0002-3997-5070 [[acm distinguished speaker (volunteer), assoc... \n",
"0000-0003-2450-090X [[consultant, horizon 2020 framework programme... \n",
"0000-0003-2593-7134 [[dr, institut agama islam negeri syekh nurjat... \n",
"0000-0002-3920-7389 [[general director, scientific technical centr... \n",
"0000-0003-4948-9268 [[titular professor, dirección general de cult... \n",
"0000-0003-2183-8112 [[professor, universidade federal do rio grand... \n",
"0000-0002-6938-9638 NaN \n",
"0000-0002-9025-8632 NaN \n",
"0000-0002-9965-2425 [[assistant professor, adam mickiewicz univers... \n",
"0000-0002-4062-3603 [[inspector general jornada vespertina // de 2... \n",
"\n",
" n_works \\\n",
"0000-0002-5710-4041 5008 \n",
"0000-0002-1929-6054 38 \n",
"0000-0003-2407-3557 72 \n",
"0000-0002-3997-5070 93 \n",
"0000-0003-2450-090X 274 \n",
"0000-0003-2593-7134 79 \n",
"0000-0002-3920-7389 472 \n",
"0000-0003-4948-9268 41 \n",
"0000-0003-2183-8112 1108 \n",
"0000-0002-6938-9638 2023 \n",
"0000-0002-9025-8632 10 \n",
"0000-0002-9965-2425 29 \n",
"0000-0002-4062-3603 11 \n",
"\n",
" works_source \\\n",
"0000-0002-5710-4041 [inspire-hep, researcherid, isni2orcid search ... \n",
"0000-0002-1929-6054 [researcherid, base - bielefeld academic searc... \n",
"0000-0003-2407-3557 [base - bielefeld academic search engine, abdu... \n",
"0000-0002-3997-5070 [publons, multidisciplinary digital publishing... \n",
"0000-0003-2450-090X [the lens, base - bielefeld academic search en... \n",
"0000-0003-2593-7134 [publons, aan jaelani, scopus - elsevier, dime... \n",
"0000-0002-3920-7389 [publons, datacite, scopus - elsevier, a.l. gu... \n",
"0000-0003-4948-9268 [gustavo duperré, scopus - elsevier, publons, ... \n",
"0000-0003-2183-8112 [the lens, pelayo munhoz olea, dimensions, bas... \n",
"0000-0002-6938-9638 [base - bielefeld academic search engine, data... \n",
"0000-0002-9025-8632 [goowonderland dispensary] \n",
"0000-0002-9965-2425 [scopus - elsevier] \n",
"0000-0002-4062-3603 [juan de dios beltr´´án mancilla] \n",
"\n",
" activation_date last_update_date \\\n",
"0000-0002-5710-4041 2013-01-20t12:09:21.600z 2021-03-11t20:57:13.284z \n",
"0000-0002-1929-6054 2017-09-15t19:45:43.483z 2021-03-14t20:20:21.282z \n",
"0000-0003-2407-3557 2016-09-12t04:41:24.842z 2021-01-26t11:58:33.039z \n",
"0000-0002-3997-5070 2016-08-24t11:00:30.403z 2021-03-14t07:11:09.817z \n",
"0000-0003-2450-090X 2013-04-03t08:02:30.013z 2021-02-28t10:07:13.231z \n",
"0000-0003-2593-7134 2016-03-02t18:37:44.989z 2021-03-08t03:42:22.593z \n",
"0000-0002-3920-7389 2014-05-14t00:01:28.030z 2021-01-16t13:44:14.134z \n",
"0000-0003-4948-9268 2020-02-22t15:49:52.386z 2021-03-12t15:13:44.065z \n",
"0000-0003-2183-8112 2013-02-04t17:25:34.723z 2021-03-10t14:05:17.770z \n",
"0000-0002-6938-9638 2019-05-07t19:27:02.210z 2020-12-10t23:39:15.236z \n",
"0000-0002-9025-8632 2020-12-09t21:19:46.004z 2020-12-10t01:17:28.772z \n",
"0000-0002-9965-2425 2014-09-18t12:34:14.242z 2020-02-11t14:31:25.544z \n",
"0000-0002-4062-3603 2020-04-19t21:06:33.495z 2021-02-10t20:13:07.698z \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label \\\n",
"0000-0002-5710-4041 1221 25 0 1742 0 \n",
"0000-0002-1929-6054 29 0 0 33 0 \n",
"0000-0003-2407-3557 19 0 0 77 0 \n",
"0000-0002-3997-5070 47 0 0 48 0 \n",
"0000-0003-2450-090X 199 0 1 174 0 \n",
"0000-0003-2593-7134 88 0 0 193 0 \n",
"0000-0002-3920-7389 37 0 0 21 0 \n",
"0000-0003-4948-9268 13 0 0 34 0 \n",
"0000-0003-2183-8112 797 0 1 582 0 \n",
"0000-0002-6938-9638 2022 0 0 16 0 \n",
"0000-0002-9025-8632 0 0 0 0 0 \n",
"0000-0002-9965-2425 15 0 0 29 0 \n",
"0000-0002-4062-3603 0 0 0 7 0 \n",
"\n",
" primary_email_domain \\\n",
"0000-0002-5710-4041 ise.pw.edu.pl \n",
"0000-0002-1929-6054 hotmail.com \n",
"0000-0003-2407-3557 NaN \n",
"0000-0002-3997-5070 NaN \n",
"0000-0003-2450-090X NaN \n",
"0000-0003-2593-7134 syekhnurjati.ac.id \n",
"0000-0002-3920-7389 NaN \n",
"0000-0003-4948-9268 usal.edu.ar \n",
"0000-0003-2183-8112 NaN \n",
"0000-0002-6938-9638 NaN \n",
"0000-0002-9025-8632 NaN \n",
"0000-0002-9965-2425 NaN \n",
"0000-0002-4062-3603 NaN \n",
"\n",
" other_email_domains \\\n",
"0000-0002-5710-4041 [ise.pw.edu.pl, elka.pw.edu.pl, cern.ch] \n",
"0000-0002-1929-6054 [gmail.com, gmail.com, hotmail.com, baldwin.ed... \n",
"0000-0003-2407-3557 NaN \n",
"0000-0002-3997-5070 NaN \n",
"0000-0003-2450-090X NaN \n",
"0000-0003-2593-7134 [gmail.com] \n",
"0000-0002-3920-7389 NaN \n",
"0000-0003-4948-9268 NaN \n",
"0000-0003-2183-8112 NaN \n",
"0000-0002-6938-9638 NaN \n",
"0000-0002-9025-8632 NaN \n",
"0000-0002-9965-2425 NaN \n",
"0000-0002-4062-3603 NaN \n",
"\n",
" url_domains \\\n",
"0000-0002-5710-4041 [google.pl, publons.com, scopus.com, mendeley.... \n",
"0000-0002-1929-6054 [concytec.gob.pe, redalyc.org, redalyc.org, un... \n",
"0000-0003-2407-3557 [google.com, syekhnurjati.ac.id, orcid.org, bl... \n",
"0000-0002-3997-5070 [geethashishu.in, geethashishu.in, acm.org, go... \n",
"0000-0003-2450-090X [worldassessmentcouncil.org, spseke.sk, bcs.or... \n",
"0000-0003-2593-7134 [microsoft.com, twitter.com, academia.edu, aca... \n",
"0000-0002-3920-7389 [youtube.com, isjaee.com, researchgate.net, re... \n",
"0000-0003-4948-9268 [icomos.ro, unirioja.es, unirioja.es, unc.edu.... \n",
"0000-0003-2183-8112 [cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c... \n",
"0000-0002-6938-9638 [researchgate.net, youtube.com, linkedin.com, ... \n",
"0000-0002-9025-8632 [goowonderland.com, goowonderland.com, goowond... \n",
"0000-0002-9965-2425 [biowebspin.com, biowebspin.com, google.com, l... \n",
"0000-0002-4062-3603 [yumpu.com, ijopm.org, google.com, blogspot.co... \n",
"\n",
" n_emails n_urls n_ids n_keywords n_education \\\n",
"0000-0002-5710-4041 3.0 114.0 3.0 5.0 1.0 \n",
"0000-0002-1929-6054 5.0 61.0 4.0 2.0 1.0 \n",
"0000-0003-2407-3557 NaN 59.0 NaN 4.0 3.0 \n",
"0000-0002-3997-5070 NaN 71.0 3.0 6.0 5.0 \n",
"0000-0003-2450-090X NaN 114.0 5.0 8.0 6.0 \n",
"0000-0003-2593-7134 1.0 67.0 4.0 7.0 2.0 \n",
"0000-0002-3920-7389 NaN 111.0 2.0 16.0 2.0 \n",
"0000-0003-4948-9268 NaN 61.0 2.0 11.0 6.0 \n",
"0000-0003-2183-8112 NaN 61.0 2.0 3.0 7.0 \n",
"0000-0002-6938-9638 NaN 152.0 1.0 NaN 6.0 \n",
"0000-0002-9025-8632 NaN 81.0 NaN 7.0 NaN \n",
"0000-0002-9965-2425 NaN 73.0 1.0 4.0 4.0 \n",
"0000-0002-4062-3603 NaN 69.0 NaN 1.0 8.0 \n",
"\n",
" n_employment \n",
"0000-0002-5710-4041 1.0 \n",
"0000-0002-1929-6054 1.0 \n",
"0000-0003-2407-3557 1.0 \n",
"0000-0002-3997-5070 10.0 \n",
"0000-0003-2450-090X 22.0 \n",
"0000-0003-2593-7134 1.0 \n",
"0000-0002-3920-7389 7.0 \n",
"0000-0003-4948-9268 5.0 \n",
"0000-0003-2183-8112 9.0 \n",
"0000-0002-6938-9638 NaN \n",
"0000-0002-9025-8632 NaN \n",
"0000-0002-9965-2425 2.0 \n",
"0000-0002-4062-3603 6.0 "
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[(df['url_domains'].str.len() > 50) & (df['n_works'] > 0)]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0000-0002-3505-2797</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nurul</td>\n",
" <td>malahayati</td>\n",
" <td>google scholar</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[researcherid, q-3861-2017]]</td>\n",
" <td>[[civil and transportation engineering , maste...</td>\n",
" <td>[[senior lecturer, universitas syiah kuala, ba...</td>\n",
" <td>6</td>\n",
" <td>[nurul malahayati]</td>\n",
" <td>2017-10-01t00:46:31.324z</td>\n",
" <td>2019-08-19t15:52:47.253z</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com, ristekdikti.go.id, unsyiah.ac.id,...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-3670-9620</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>carlos</td>\n",
" <td>barrera</td>\n",
" <td>im individual inventor, and this is my work; s...</td>\n",
" <td>[retrodynamic, novelinflow]</td>\n",
" <td>NaN</td>\n",
" <td>[gearturbine, mechanical, power, innovation, t...</td>\n",
" <td>[[loop profile, 394457]]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>[carlos barrera]</td>\n",
" <td>2016-08-29t20:32:10.362z</td>\n",
" <td>2021-02-09t04:56:35.554z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[blogspot.mx, behance.net, authorstream.com, d...</td>\n",
" <td>NaN</td>\n",
" <td>24.0</td>\n",
" <td>1.0</td>\n",
" <td>8.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-5441-0465</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nuria</td>\n",
" <td>hernández-león</td>\n",
" <td>NaN</td>\n",
" <td>[nuria h. león, nuria hernández león, hernánde...</td>\n",
" <td>NaN</td>\n",
" <td>[marketing, research, human resources, busines...</td>\n",
" <td>NaN</td>\n",
" <td>[[, course: social skills, university of salam...</td>\n",
" <td>[[merchandise reception and expedition trainer...</td>\n",
" <td>11</td>\n",
" <td>[nuria hernández-león]</td>\n",
" <td>2015-11-28t07:18:58.442z</td>\n",
" <td>2021-03-05t16:37:47.403z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[feriaempresamujer.com, escueladenegociosydire...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>19.0</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-7781-6767</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>mohd nazri</td>\n",
" <td>ismail</td>\n",
" <td>born in penang, malaysia in 1971, dr. mohd had...</td>\n",
" <td>[ndum (national defence university of malaysia)]</td>\n",
" <td>NaN</td>\n",
" <td>[network communication, manet, wsn, network se...</td>\n",
" <td>[[scopus author id, 24372977800], [researcheri...</td>\n",
" <td>NaN</td>\n",
" <td>[[lecturer, universiti pertahanan nasional mal...</td>\n",
" <td>35</td>\n",
" <td>[scopus - elsevier]</td>\n",
" <td>2016-09-06t02:25:52.974z</td>\n",
" <td>2020-10-20t06:55:55.051z</td>\n",
" <td>24</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>35</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com.my, researchgate.net, academia.edu...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>2.0</td>\n",
" <td>10.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0001-7010-2908</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>clara</td>\n",
" <td>sarmento</td>\n",
" <td>clara sarmento holds an aggregation in cultura...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[ethnography, tourism and business, anglo-amer...</td>\n",
" <td>[[ciência id, d418-d6f8-7d49]]</td>\n",
" <td>[[ao abrigo da bolsa santander ie best practic...</td>\n",
" <td>[[presidente da comissão de acreditação do nov...</td>\n",
" <td>275</td>\n",
" <td>[clara sarmento]</td>\n",
" <td>2013-12-12t00:33:58.190z</td>\n",
" <td>2020-10-12t14:43:00.749z</td>\n",
" <td>17</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>60</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[iscap.pt, google.pt, academia.edu, researchga...</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>1.0</td>\n",
" <td>6.0</td>\n",
" <td>8.0</td>\n",
" <td>37.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-9446-9496</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>jesús</td>\n",
" <td>portillo-fernández</td>\n",
" <td>ba in philosophy, ba in humanities, ph.d. in p...</td>\n",
" <td>[jesús portillo fernández, portillo-fernández,...</td>\n",
" <td>NaN</td>\n",
" <td>[absurdo, lingüística, pragmática, filosofía d...</td>\n",
" <td>[[scopus author id, 55229372800]]</td>\n",
" <td>[[, doctor en filología, universidad de sevill...</td>\n",
" <td>[[, grupo de investigación en lógica, lenguaje...</td>\n",
" <td>35</td>\n",
" <td>[jesús portillo-fernández]</td>\n",
" <td>2015-03-08t20:37:16.590z</td>\n",
" <td>2021-03-12t22:05:28.976z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[us.es, us.es, us.es, google.es, microsoft.com...</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>1.0</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-0579-5829</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>ángel</td>\n",
" <td>carrión-tavárez</td>\n",
" <td>NaN</td>\n",
" <td>[ángel carrión tavárez, á carrión tavárez, ác ...</td>\n",
" <td>NaN</td>\n",
" <td>[editing and publishing, sociomusicology, geog...</td>\n",
" <td>[[loop profile, 687295]]</td>\n",
" <td>[[integration and economic and territorial dev...</td>\n",
" <td>[[director, university of puerto rico at río p...</td>\n",
" <td>132</td>\n",
" <td>[ángel carrión-tavárez]</td>\n",
" <td>2017-12-30t19:25:41.566z</td>\n",
" <td>2021-03-13t23:21:59.069z</td>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>28</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[academia.edu, redalyc.org, directorioexit.inf...</td>\n",
" <td>NaN</td>\n",
" <td>11.0</td>\n",
" <td>1.0</td>\n",
" <td>6.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0001-8960-9004</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>susan</td>\n",
" <td>bastani</td>\n",
" <td>NaN</td>\n",
" <td>[s. bastani, سوسن باستانی]</td>\n",
" <td>sbastani@alzahra.ac.ir</td>\n",
" <td>[social networks, online and offline communiti...</td>\n",
" <td>[[scopus author id, 16642098400]]</td>\n",
" <td>[[sociology, ph.d., university of toronto, tor...</td>\n",
" <td>[[professor, alzahra university, tehran, vanak...</td>\n",
" <td>20</td>\n",
" <td>[scopus - elsevier]</td>\n",
" <td>2019-07-10t06:50:46.255z</td>\n",
" <td>2020-10-07t04:08:01.961z</td>\n",
" <td>19</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>33</td>\n",
" <td>0</td>\n",
" <td>alzahra.ac.ir</td>\n",
" <td>[gmail.com, gmail.com]</td>\n",
" <td>[scopus.com, google.com, publons.com, zenodo.o...</td>\n",
" <td>2.0</td>\n",
" <td>11.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-4379-6454</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>caroline wanjiru</td>\n",
" <td>kariuki</td>\n",
" <td>caroline holds a phd in economics from curtin ...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[applied economics, financial economics, appli...</td>\n",
" <td>NaN</td>\n",
" <td>[[economics, doctor of philosophy , curtin uni...</td>\n",
" <td>[[director, educational development, strathmor...</td>\n",
" <td>4</td>\n",
" <td>[caroline wanjiru kariuki]</td>\n",
" <td>2020-03-18t10:18:04.007z</td>\n",
" <td>2021-02-11t14:40:38.515z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[scopus.com, mendeley.com, publons.com, resear...</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-2311-0600</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>myo</td>\n",
" <td>kyaw hlaing</td>\n",
" <td>NaN</td>\n",
" <td>[dr myo kyaw hlaing]</td>\n",
" <td>NaN</td>\n",
" <td>[economic geology]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[lecturer, union of myanmar ministry of educa...</td>\n",
" <td>2</td>\n",
" <td>[myo kyaw hlaing]</td>\n",
" <td>2018-12-26t12:51:57.801z</td>\n",
" <td>2021-01-26t14:36:47.421z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[facebook.com, linkedin.com, instagram.com, re...</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>141 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"0000-0002-3505-2797 1 1 1 \n",
"0000-0003-3670-9620 1 1 1 \n",
"0000-0002-5441-0465 1 1 1 \n",
"0000-0002-7781-6767 1 1 1 \n",
"0000-0001-7010-2908 1 1 1 \n",
"... ... ... ... \n",
"0000-0002-9446-9496 1 1 1 \n",
"0000-0003-0579-5829 1 1 1 \n",
"0000-0001-8960-9004 1 1 1 \n",
"0000-0002-4379-6454 1 1 1 \n",
"0000-0003-2311-0600 1 1 1 \n",
"\n",
" given_names family_name \\\n",
"0000-0002-3505-2797 nurul malahayati \n",
"0000-0003-3670-9620 carlos barrera \n",
"0000-0002-5441-0465 nuria hernández-león \n",
"0000-0002-7781-6767 mohd nazri ismail \n",
"0000-0001-7010-2908 clara sarmento \n",
"... ... ... \n",
"0000-0002-9446-9496 jesús portillo-fernández \n",
"0000-0003-0579-5829 ángel carrión-tavárez \n",
"0000-0001-8960-9004 susan bastani \n",
"0000-0002-4379-6454 caroline wanjiru kariuki \n",
"0000-0003-2311-0600 myo kyaw hlaing \n",
"\n",
" biography \\\n",
"0000-0002-3505-2797 google scholar \n",
"0000-0003-3670-9620 im individual inventor, and this is my work; s... \n",
"0000-0002-5441-0465 NaN \n",
"0000-0002-7781-6767 born in penang, malaysia in 1971, dr. mohd had... \n",
"0000-0001-7010-2908 clara sarmento holds an aggregation in cultura... \n",
"... ... \n",
"0000-0002-9446-9496 ba in philosophy, ba in humanities, ph.d. in p... \n",
"0000-0003-0579-5829 NaN \n",
"0000-0001-8960-9004 NaN \n",
"0000-0002-4379-6454 caroline holds a phd in economics from curtin ... \n",
"0000-0003-2311-0600 NaN \n",
"\n",
" other_names \\\n",
"0000-0002-3505-2797 NaN \n",
"0000-0003-3670-9620 [retrodynamic, novelinflow] \n",
"0000-0002-5441-0465 [nuria h. león, nuria hernández león, hernánde... \n",
"0000-0002-7781-6767 [ndum (national defence university of malaysia)] \n",
"0000-0001-7010-2908 NaN \n",
"... ... \n",
"0000-0002-9446-9496 [jesús portillo fernández, portillo-fernández,... \n",
"0000-0003-0579-5829 [ángel carrión tavárez, á carrión tavárez, ác ... \n",
"0000-0001-8960-9004 [s. bastani, سوسن باستانی] \n",
"0000-0002-4379-6454 NaN \n",
"0000-0003-2311-0600 [dr myo kyaw hlaing] \n",
"\n",
" primary_email \\\n",
"0000-0002-3505-2797 NaN \n",
"0000-0003-3670-9620 NaN \n",
"0000-0002-5441-0465 NaN \n",
"0000-0002-7781-6767 NaN \n",
"0000-0001-7010-2908 NaN \n",
"... ... \n",
"0000-0002-9446-9496 NaN \n",
"0000-0003-0579-5829 NaN \n",
"0000-0001-8960-9004 sbastani@alzahra.ac.ir \n",
"0000-0002-4379-6454 NaN \n",
"0000-0003-2311-0600 NaN \n",
"\n",
" keywords \\\n",
"0000-0002-3505-2797 NaN \n",
"0000-0003-3670-9620 [gearturbine, mechanical, power, innovation, t... \n",
"0000-0002-5441-0465 [marketing, research, human resources, busines... \n",
"0000-0002-7781-6767 [network communication, manet, wsn, network se... \n",
"0000-0001-7010-2908 [ethnography, tourism and business, anglo-amer... \n",
"... ... \n",
"0000-0002-9446-9496 [absurdo, lingüística, pragmática, filosofía d... \n",
"0000-0003-0579-5829 [editing and publishing, sociomusicology, geog... \n",
"0000-0001-8960-9004 [social networks, online and offline communiti... \n",
"0000-0002-4379-6454 [applied economics, financial economics, appli... \n",
"0000-0003-2311-0600 [economic geology] \n",
"\n",
" external_ids \\\n",
"0000-0002-3505-2797 [[researcherid, q-3861-2017]] \n",
"0000-0003-3670-9620 [[loop profile, 394457]] \n",
"0000-0002-5441-0465 NaN \n",
"0000-0002-7781-6767 [[scopus author id, 24372977800], [researcheri... \n",
"0000-0001-7010-2908 [[ciência id, d418-d6f8-7d49]] \n",
"... ... \n",
"0000-0002-9446-9496 [[scopus author id, 55229372800]] \n",
"0000-0003-0579-5829 [[loop profile, 687295]] \n",
"0000-0001-8960-9004 [[scopus author id, 16642098400]] \n",
"0000-0002-4379-6454 NaN \n",
"0000-0003-2311-0600 NaN \n",
"\n",
" education \\\n",
"0000-0002-3505-2797 [[civil and transportation engineering , maste... \n",
"0000-0003-3670-9620 NaN \n",
"0000-0002-5441-0465 [[, course: social skills, university of salam... \n",
"0000-0002-7781-6767 NaN \n",
"0000-0001-7010-2908 [[ao abrigo da bolsa santander ie best practic... \n",
"... ... \n",
"0000-0002-9446-9496 [[, doctor en filología, universidad de sevill... \n",
"0000-0003-0579-5829 [[integration and economic and territorial dev... \n",
"0000-0001-8960-9004 [[sociology, ph.d., university of toronto, tor... \n",
"0000-0002-4379-6454 [[economics, doctor of philosophy , curtin uni... \n",
"0000-0003-2311-0600 NaN \n",
"\n",
" employment \\\n",
"0000-0002-3505-2797 [[senior lecturer, universitas syiah kuala, ba... \n",
"0000-0003-3670-9620 NaN \n",
"0000-0002-5441-0465 [[merchandise reception and expedition trainer... \n",
"0000-0002-7781-6767 [[lecturer, universiti pertahanan nasional mal... \n",
"0000-0001-7010-2908 [[presidente da comissão de acreditação do nov... \n",
"... ... \n",
"0000-0002-9446-9496 [[, grupo de investigación en lógica, lenguaje... \n",
"0000-0003-0579-5829 [[director, university of puerto rico at río p... \n",
"0000-0001-8960-9004 [[professor, alzahra university, tehran, vanak... \n",
"0000-0002-4379-6454 [[director, educational development, strathmor... \n",
"0000-0003-2311-0600 [[lecturer, union of myanmar ministry of educa... \n",
"\n",
" n_works works_source \\\n",
"0000-0002-3505-2797 6 [nurul malahayati] \n",
"0000-0003-3670-9620 1 [carlos barrera] \n",
"0000-0002-5441-0465 11 [nuria hernández-león] \n",
"0000-0002-7781-6767 35 [scopus - elsevier] \n",
"0000-0001-7010-2908 275 [clara sarmento] \n",
"... ... ... \n",
"0000-0002-9446-9496 35 [jesús portillo-fernández] \n",
"0000-0003-0579-5829 132 [ángel carrión-tavárez] \n",
"0000-0001-8960-9004 20 [scopus - elsevier] \n",
"0000-0002-4379-6454 4 [caroline wanjiru kariuki] \n",
"0000-0003-2311-0600 2 [myo kyaw hlaing] \n",
"\n",
" activation_date last_update_date \\\n",
"0000-0002-3505-2797 2017-10-01t00:46:31.324z 2019-08-19t15:52:47.253z \n",
"0000-0003-3670-9620 2016-08-29t20:32:10.362z 2021-02-09t04:56:35.554z \n",
"0000-0002-5441-0465 2015-11-28t07:18:58.442z 2021-03-05t16:37:47.403z \n",
"0000-0002-7781-6767 2016-09-06t02:25:52.974z 2020-10-20t06:55:55.051z \n",
"0000-0001-7010-2908 2013-12-12t00:33:58.190z 2020-10-12t14:43:00.749z \n",
"... ... ... \n",
"0000-0002-9446-9496 2015-03-08t20:37:16.590z 2021-03-12t22:05:28.976z \n",
"0000-0003-0579-5829 2017-12-30t19:25:41.566z 2021-03-13t23:21:59.069z \n",
"0000-0001-8960-9004 2019-07-10t06:50:46.255z 2020-10-07t04:08:01.961z \n",
"0000-0002-4379-6454 2020-03-18t10:18:04.007z 2021-02-11t14:40:38.515z \n",
"0000-0003-2311-0600 2018-12-26t12:51:57.801z 2021-01-26t14:36:47.421z \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label \\\n",
"0000-0002-3505-2797 3 0 0 3 0 \n",
"0000-0003-3670-9620 0 0 0 0 0 \n",
"0000-0002-5441-0465 1 0 0 4 0 \n",
"0000-0002-7781-6767 24 0 0 35 0 \n",
"0000-0001-7010-2908 17 0 0 60 0 \n",
"... ... ... ... ... ... \n",
"0000-0002-9446-9496 0 0 0 0 0 \n",
"0000-0003-0579-5829 13 0 0 28 0 \n",
"0000-0001-8960-9004 19 0 0 33 0 \n",
"0000-0002-4379-6454 1 0 0 0 0 \n",
"0000-0003-2311-0600 1 0 0 2 0 \n",
"\n",
" primary_email_domain other_email_domains \\\n",
"0000-0002-3505-2797 NaN NaN \n",
"0000-0003-3670-9620 NaN NaN \n",
"0000-0002-5441-0465 NaN NaN \n",
"0000-0002-7781-6767 NaN NaN \n",
"0000-0001-7010-2908 NaN NaN \n",
"... ... ... \n",
"0000-0002-9446-9496 NaN NaN \n",
"0000-0003-0579-5829 NaN NaN \n",
"0000-0001-8960-9004 alzahra.ac.ir [gmail.com, gmail.com] \n",
"0000-0002-4379-6454 NaN NaN \n",
"0000-0003-2311-0600 NaN NaN \n",
"\n",
" url_domains \\\n",
"0000-0002-3505-2797 [google.com, ristekdikti.go.id, unsyiah.ac.id,... \n",
"0000-0003-3670-9620 [blogspot.mx, behance.net, authorstream.com, d... \n",
"0000-0002-5441-0465 [feriaempresamujer.com, escueladenegociosydire... \n",
"0000-0002-7781-6767 [google.com.my, researchgate.net, academia.edu... \n",
"0000-0001-7010-2908 [iscap.pt, google.pt, academia.edu, researchga... \n",
"... ... \n",
"0000-0002-9446-9496 [us.es, us.es, us.es, google.es, microsoft.com... \n",
"0000-0003-0579-5829 [academia.edu, redalyc.org, directorioexit.inf... \n",
"0000-0001-8960-9004 [scopus.com, google.com, publons.com, zenodo.o... \n",
"0000-0002-4379-6454 [scopus.com, mendeley.com, publons.com, resear... \n",
"0000-0003-2311-0600 [facebook.com, linkedin.com, instagram.com, re... \n",
"\n",
" n_emails n_urls n_ids n_keywords n_education \\\n",
"0000-0002-3505-2797 NaN 16.0 1.0 NaN 2.0 \n",
"0000-0003-3670-9620 NaN 24.0 1.0 8.0 NaN \n",
"0000-0002-5441-0465 NaN 16.0 NaN 7.0 19.0 \n",
"0000-0002-7781-6767 NaN 16.0 2.0 10.0 NaN \n",
"0000-0001-7010-2908 NaN 13.0 1.0 6.0 8.0 \n",
"... ... ... ... ... ... \n",
"0000-0002-9446-9496 NaN 12.0 1.0 5.0 5.0 \n",
"0000-0003-0579-5829 NaN 11.0 1.0 6.0 4.0 \n",
"0000-0001-8960-9004 2.0 11.0 1.0 4.0 3.0 \n",
"0000-0002-4379-6454 NaN 13.0 NaN 4.0 3.0 \n",
"0000-0003-2311-0600 NaN 12.0 NaN 1.0 NaN \n",
"\n",
" n_employment \n",
"0000-0002-3505-2797 1.0 \n",
"0000-0003-3670-9620 NaN \n",
"0000-0002-5441-0465 16.0 \n",
"0000-0002-7781-6767 4.0 \n",
"0000-0001-7010-2908 37.0 \n",
"... ... \n",
"0000-0002-9446-9496 1.0 \n",
"0000-0003-0579-5829 3.0 \n",
"0000-0001-8960-9004 4.0 \n",
"0000-0002-4379-6454 6.0 \n",
"0000-0003-2311-0600 2.0 \n",
"\n",
"[141 rows x 30 columns]"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)]"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nurul</td>\n",
" <td>malahayati</td>\n",
" <td>google scholar</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[researcherid, q-3861-2017]]</td>\n",
" <td>[[civil and transportation engineering , maste...</td>\n",
" <td>[[senior lecturer, universitas syiah kuala, ba...</td>\n",
" <td>6</td>\n",
" <td>nurul malahayati</td>\n",
" <td>2017-10-01t00:46:31.324z</td>\n",
" <td>2019-08-19t15:52:47.253z</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com, ristekdikti.go.id, unsyiah.ac.id,...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>carlos</td>\n",
" <td>barrera</td>\n",
" <td>im individual inventor, and this is my work; s...</td>\n",
" <td>[retrodynamic, novelinflow]</td>\n",
" <td>NaN</td>\n",
" <td>[gearturbine, mechanical, power, innovation, t...</td>\n",
" <td>[[loop profile, 394457]]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>carlos barrera</td>\n",
" <td>2016-08-29t20:32:10.362z</td>\n",
" <td>2021-02-09t04:56:35.554z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[blogspot.mx, behance.net, authorstream.com, d...</td>\n",
" <td>NaN</td>\n",
" <td>24.0</td>\n",
" <td>1.0</td>\n",
" <td>8.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nuria</td>\n",
" <td>hernández-león</td>\n",
" <td>NaN</td>\n",
" <td>[nuria h. león, nuria hernández león, hernánde...</td>\n",
" <td>NaN</td>\n",
" <td>[marketing, research, human resources, busines...</td>\n",
" <td>NaN</td>\n",
" <td>[[, course: social skills, university of salam...</td>\n",
" <td>[[merchandise reception and expedition trainer...</td>\n",
" <td>11</td>\n",
" <td>nuria hernández-león</td>\n",
" <td>2015-11-28t07:18:58.442z</td>\n",
" <td>2021-03-05t16:37:47.403z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[feriaempresamujer.com, escueladenegociosydire...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>19.0</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>mohd nazri</td>\n",
" <td>ismail</td>\n",
" <td>born in penang, malaysia in 1971, dr. mohd had...</td>\n",
" <td>[ndum (national defence university of malaysia)]</td>\n",
" <td>NaN</td>\n",
" <td>[network communication, manet, wsn, network se...</td>\n",
" <td>[[scopus author id, 24372977800], [researcheri...</td>\n",
" <td>NaN</td>\n",
" <td>[[lecturer, universiti pertahanan nasional mal...</td>\n",
" <td>35</td>\n",
" <td>scopus - elsevier</td>\n",
" <td>2016-09-06t02:25:52.974z</td>\n",
" <td>2020-10-20t06:55:55.051z</td>\n",
" <td>24</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>35</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com.my, researchgate.net, academia.edu...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>2.0</td>\n",
" <td>10.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>clara</td>\n",
" <td>sarmento</td>\n",
" <td>clara sarmento holds an aggregation in cultura...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[ethnography, tourism and business, anglo-amer...</td>\n",
" <td>[[ciência id, d418-d6f8-7d49]]</td>\n",
" <td>[[ao abrigo da bolsa santander ie best practic...</td>\n",
" <td>[[presidente da comissão de acreditação do nov...</td>\n",
" <td>275</td>\n",
" <td>clara sarmento</td>\n",
" <td>2013-12-12t00:33:58.190z</td>\n",
" <td>2020-10-12t14:43:00.749z</td>\n",
" <td>17</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>60</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[iscap.pt, google.pt, academia.edu, researchga...</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>1.0</td>\n",
" <td>6.0</td>\n",
" <td>8.0</td>\n",
" <td>37.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>jesús</td>\n",
" <td>portillo-fernández</td>\n",
" <td>ba in philosophy, ba in humanities, ph.d. in p...</td>\n",
" <td>[jesús portillo fernández, portillo-fernández,...</td>\n",
" <td>NaN</td>\n",
" <td>[absurdo, lingüística, pragmática, filosofía d...</td>\n",
" <td>[[scopus author id, 55229372800]]</td>\n",
" <td>[[, doctor en filología, universidad de sevill...</td>\n",
" <td>[[, grupo de investigación en lógica, lenguaje...</td>\n",
" <td>35</td>\n",
" <td>jesús portillo-fernández</td>\n",
" <td>2015-03-08t20:37:16.590z</td>\n",
" <td>2021-03-12t22:05:28.976z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[us.es, us.es, us.es, google.es, microsoft.com...</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>1.0</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>ángel</td>\n",
" <td>carrión-tavárez</td>\n",
" <td>NaN</td>\n",
" <td>[ángel carrión tavárez, á carrión tavárez, ác ...</td>\n",
" <td>NaN</td>\n",
" <td>[editing and publishing, sociomusicology, geog...</td>\n",
" <td>[[loop profile, 687295]]</td>\n",
" <td>[[integration and economic and territorial dev...</td>\n",
" <td>[[director, university of puerto rico at río p...</td>\n",
" <td>132</td>\n",
" <td>ángel carrión-tavárez</td>\n",
" <td>2017-12-30t19:25:41.566z</td>\n",
" <td>2021-03-13t23:21:59.069z</td>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>28</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[academia.edu, redalyc.org, directorioexit.inf...</td>\n",
" <td>NaN</td>\n",
" <td>11.0</td>\n",
" <td>1.0</td>\n",
" <td>6.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>susan</td>\n",
" <td>bastani</td>\n",
" <td>NaN</td>\n",
" <td>[s. bastani, سوسن باستانی]</td>\n",
" <td>sbastani@alzahra.ac.ir</td>\n",
" <td>[social networks, online and offline communiti...</td>\n",
" <td>[[scopus author id, 16642098400]]</td>\n",
" <td>[[sociology, ph.d., university of toronto, tor...</td>\n",
" <td>[[professor, alzahra university, tehran, vanak...</td>\n",
" <td>20</td>\n",
" <td>scopus - elsevier</td>\n",
" <td>2019-07-10t06:50:46.255z</td>\n",
" <td>2020-10-07t04:08:01.961z</td>\n",
" <td>19</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>33</td>\n",
" <td>0</td>\n",
" <td>alzahra.ac.ir</td>\n",
" <td>[gmail.com, gmail.com]</td>\n",
" <td>[scopus.com, google.com, publons.com, zenodo.o...</td>\n",
" <td>2.0</td>\n",
" <td>11.0</td>\n",
" <td>1.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>caroline wanjiru</td>\n",
" <td>kariuki</td>\n",
" <td>caroline holds a phd in economics from curtin ...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[applied economics, financial economics, appli...</td>\n",
" <td>NaN</td>\n",
" <td>[[economics, doctor of philosophy , curtin uni...</td>\n",
" <td>[[director, educational development, strathmor...</td>\n",
" <td>4</td>\n",
" <td>caroline wanjiru kariuki</td>\n",
" <td>2020-03-18t10:18:04.007z</td>\n",
" <td>2021-02-11t14:40:38.515z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[scopus.com, mendeley.com, publons.com, resear...</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>myo</td>\n",
" <td>kyaw hlaing</td>\n",
" <td>NaN</td>\n",
" <td>[dr myo kyaw hlaing]</td>\n",
" <td>NaN</td>\n",
" <td>[economic geology]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[lecturer, union of myanmar ministry of educa...</td>\n",
" <td>2</td>\n",
" <td>myo kyaw hlaing</td>\n",
" <td>2018-12-26t12:51:57.801z</td>\n",
" <td>2021-01-26t14:36:47.421z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[facebook.com, linkedin.com, instagram.com, re...</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>141 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email given_names \\\n",
"0 1 1 1 nurul \n",
"1 1 1 1 carlos \n",
"2 1 1 1 nuria \n",
"3 1 1 1 mohd nazri \n",
"4 1 1 1 clara \n",
".. ... ... ... ... \n",
"136 1 1 1 jesús \n",
"137 1 1 1 ángel \n",
"138 1 1 1 susan \n",
"139 1 1 1 caroline wanjiru \n",
"140 1 1 1 myo \n",
"\n",
" family_name biography \\\n",
"0 malahayati google scholar \n",
"1 barrera im individual inventor, and this is my work; s... \n",
"2 hernández-león NaN \n",
"3 ismail born in penang, malaysia in 1971, dr. mohd had... \n",
"4 sarmento clara sarmento holds an aggregation in cultura... \n",
".. ... ... \n",
"136 portillo-fernández ba in philosophy, ba in humanities, ph.d. in p... \n",
"137 carrión-tavárez NaN \n",
"138 bastani NaN \n",
"139 kariuki caroline holds a phd in economics from curtin ... \n",
"140 kyaw hlaing NaN \n",
"\n",
" other_names \\\n",
"0 NaN \n",
"1 [retrodynamic, novelinflow] \n",
"2 [nuria h. león, nuria hernández león, hernánde... \n",
"3 [ndum (national defence university of malaysia)] \n",
"4 NaN \n",
".. ... \n",
"136 [jesús portillo fernández, portillo-fernández,... \n",
"137 [ángel carrión tavárez, á carrión tavárez, ác ... \n",
"138 [s. bastani, سوسن باستانی] \n",
"139 NaN \n",
"140 [dr myo kyaw hlaing] \n",
"\n",
" primary_email \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
".. ... \n",
"136 NaN \n",
"137 NaN \n",
"138 sbastani@alzahra.ac.ir \n",
"139 NaN \n",
"140 NaN \n",
"\n",
" keywords \\\n",
"0 NaN \n",
"1 [gearturbine, mechanical, power, innovation, t... \n",
"2 [marketing, research, human resources, busines... \n",
"3 [network communication, manet, wsn, network se... \n",
"4 [ethnography, tourism and business, anglo-amer... \n",
".. ... \n",
"136 [absurdo, lingüística, pragmática, filosofía d... \n",
"137 [editing and publishing, sociomusicology, geog... \n",
"138 [social networks, online and offline communiti... \n",
"139 [applied economics, financial economics, appli... \n",
"140 [economic geology] \n",
"\n",
" external_ids \\\n",
"0 [[researcherid, q-3861-2017]] \n",
"1 [[loop profile, 394457]] \n",
"2 NaN \n",
"3 [[scopus author id, 24372977800], [researcheri... \n",
"4 [[ciência id, d418-d6f8-7d49]] \n",
".. ... \n",
"136 [[scopus author id, 55229372800]] \n",
"137 [[loop profile, 687295]] \n",
"138 [[scopus author id, 16642098400]] \n",
"139 NaN \n",
"140 NaN \n",
"\n",
" education \\\n",
"0 [[civil and transportation engineering , maste... \n",
"1 NaN \n",
"2 [[, course: social skills, university of salam... \n",
"3 NaN \n",
"4 [[ao abrigo da bolsa santander ie best practic... \n",
".. ... \n",
"136 [[, doctor en filología, universidad de sevill... \n",
"137 [[integration and economic and territorial dev... \n",
"138 [[sociology, ph.d., university of toronto, tor... \n",
"139 [[economics, doctor of philosophy , curtin uni... \n",
"140 NaN \n",
"\n",
" employment n_works \\\n",
"0 [[senior lecturer, universitas syiah kuala, ba... 6 \n",
"1 NaN 1 \n",
"2 [[merchandise reception and expedition trainer... 11 \n",
"3 [[lecturer, universiti pertahanan nasional mal... 35 \n",
"4 [[presidente da comissão de acreditação do nov... 275 \n",
".. ... ... \n",
"136 [[, grupo de investigación en lógica, lenguaje... 35 \n",
"137 [[director, university of puerto rico at río p... 132 \n",
"138 [[professor, alzahra university, tehran, vanak... 20 \n",
"139 [[director, educational development, strathmor... 4 \n",
"140 [[lecturer, union of myanmar ministry of educa... 2 \n",
"\n",
" works_source activation_date \\\n",
"0 nurul malahayati 2017-10-01t00:46:31.324z \n",
"1 carlos barrera 2016-08-29t20:32:10.362z \n",
"2 nuria hernández-león 2015-11-28t07:18:58.442z \n",
"3 scopus - elsevier 2016-09-06t02:25:52.974z \n",
"4 clara sarmento 2013-12-12t00:33:58.190z \n",
".. ... ... \n",
"136 jesús portillo-fernández 2015-03-08t20:37:16.590z \n",
"137 ángel carrión-tavárez 2017-12-30t19:25:41.566z \n",
"138 scopus - elsevier 2019-07-10t06:50:46.255z \n",
"139 caroline wanjiru kariuki 2020-03-18t10:18:04.007z \n",
"140 myo kyaw hlaing 2018-12-26t12:51:57.801z \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids label \\\n",
"0 2019-08-19t15:52:47.253z 3 0 0 3 0 \n",
"1 2021-02-09t04:56:35.554z 0 0 0 0 0 \n",
"2 2021-03-05t16:37:47.403z 1 0 0 4 0 \n",
"3 2020-10-20t06:55:55.051z 24 0 0 35 0 \n",
"4 2020-10-12t14:43:00.749z 17 0 0 60 0 \n",
".. ... ... ... ... ... ... \n",
"136 2021-03-12t22:05:28.976z 0 0 0 0 0 \n",
"137 2021-03-13t23:21:59.069z 13 0 0 28 0 \n",
"138 2020-10-07t04:08:01.961z 19 0 0 33 0 \n",
"139 2021-02-11t14:40:38.515z 1 0 0 0 0 \n",
"140 2021-01-26t14:36:47.421z 1 0 0 2 0 \n",
"\n",
" primary_email_domain other_email_domains \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
".. ... ... \n",
"136 NaN NaN \n",
"137 NaN NaN \n",
"138 alzahra.ac.ir [gmail.com, gmail.com] \n",
"139 NaN NaN \n",
"140 NaN NaN \n",
"\n",
" url_domains n_emails n_urls \\\n",
"0 [google.com, ristekdikti.go.id, unsyiah.ac.id,... NaN 16.0 \n",
"1 [blogspot.mx, behance.net, authorstream.com, d... NaN 24.0 \n",
"2 [feriaempresamujer.com, escueladenegociosydire... NaN 16.0 \n",
"3 [google.com.my, researchgate.net, academia.edu... NaN 16.0 \n",
"4 [iscap.pt, google.pt, academia.edu, researchga... NaN 13.0 \n",
".. ... ... ... \n",
"136 [us.es, us.es, us.es, google.es, microsoft.com... NaN 12.0 \n",
"137 [academia.edu, redalyc.org, directorioexit.inf... NaN 11.0 \n",
"138 [scopus.com, google.com, publons.com, zenodo.o... 2.0 11.0 \n",
"139 [scopus.com, mendeley.com, publons.com, resear... NaN 13.0 \n",
"140 [facebook.com, linkedin.com, instagram.com, re... NaN 12.0 \n",
"\n",
" n_ids n_keywords n_education n_employment \n",
"0 1.0 NaN 2.0 1.0 \n",
"1 1.0 8.0 NaN NaN \n",
"2 NaN 7.0 19.0 16.0 \n",
"3 2.0 10.0 NaN 4.0 \n",
"4 1.0 6.0 8.0 37.0 \n",
".. ... ... ... ... \n",
"136 1.0 5.0 5.0 1.0 \n",
"137 1.0 6.0 4.0 3.0 \n",
"138 1.0 4.0 3.0 4.0 \n",
"139 NaN 4.0 3.0 6.0 \n",
"140 NaN 1.0 NaN 2.0 \n",
"\n",
"[141 rows x 30 columns]"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exploded_sources = df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)].explode('works_source').reset_index(drop=True)\n",
"exploded_sources"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nurul</td>\n",
" <td>malahayati</td>\n",
" <td>google scholar</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[researcherid, q-3861-2017]]</td>\n",
" <td>[[civil and transportation engineering , maste...</td>\n",
" <td>[[senior lecturer, universitas syiah kuala, ba...</td>\n",
" <td>6</td>\n",
" <td>nurul malahayati</td>\n",
" <td>2017-10-01t00:46:31.324z</td>\n",
" <td>2019-08-19t15:52:47.253z</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com, ristekdikti.go.id, unsyiah.ac.id,...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>carlos</td>\n",
" <td>barrera</td>\n",
" <td>im individual inventor, and this is my work; s...</td>\n",
" <td>[retrodynamic, novelinflow]</td>\n",
" <td>NaN</td>\n",
" <td>[gearturbine, mechanical, power, innovation, t...</td>\n",
" <td>[[loop profile, 394457]]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>carlos barrera</td>\n",
" <td>2016-08-29t20:32:10.362z</td>\n",
" <td>2021-02-09t04:56:35.554z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[blogspot.mx, behance.net, authorstream.com, d...</td>\n",
" <td>NaN</td>\n",
" <td>24.0</td>\n",
" <td>1.0</td>\n",
" <td>8.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>nuria</td>\n",
" <td>hernández-león</td>\n",
" <td>NaN</td>\n",
" <td>[nuria h. león, nuria hernández león, hernánde...</td>\n",
" <td>NaN</td>\n",
" <td>[marketing, research, human resources, busines...</td>\n",
" <td>NaN</td>\n",
" <td>[[, course: social skills, university of salam...</td>\n",
" <td>[[merchandise reception and expedition trainer...</td>\n",
" <td>11</td>\n",
" <td>nuria hernández-león</td>\n",
" <td>2015-11-28t07:18:58.442z</td>\n",
" <td>2021-03-05t16:37:47.403z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[feriaempresamujer.com, escueladenegociosydire...</td>\n",
" <td>NaN</td>\n",
" <td>16.0</td>\n",
" <td>NaN</td>\n",
" <td>7.0</td>\n",
" <td>19.0</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>clara</td>\n",
" <td>sarmento</td>\n",
" <td>clara sarmento holds an aggregation in cultura...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[ethnography, tourism and business, anglo-amer...</td>\n",
" <td>[[ciência id, d418-d6f8-7d49]]</td>\n",
" <td>[[ao abrigo da bolsa santander ie best practic...</td>\n",
" <td>[[presidente da comissão de acreditação do nov...</td>\n",
" <td>275</td>\n",
" <td>clara sarmento</td>\n",
" <td>2013-12-12t00:33:58.190z</td>\n",
" <td>2020-10-12t14:43:00.749z</td>\n",
" <td>17</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>60</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[iscap.pt, google.pt, academia.edu, researchga...</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>1.0</td>\n",
" <td>6.0</td>\n",
" <td>8.0</td>\n",
" <td>37.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>michele</td>\n",
" <td>dantini</td>\n",
" <td>michele dantini (ph. d) is professor of histor...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[contemporary art, art theory, postcolonial, h...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[, università per stranieri di perugia, perug...</td>\n",
" <td>6</td>\n",
" <td>michele dantini</td>\n",
" <td>2014-02-22t17:01:43.444z</td>\n",
" <td>2019-11-25t20:21:04.714z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[unipmn.it, huffingtonpost.it, roars.it, doppi...</td>\n",
" <td>NaN</td>\n",
" <td>15.0</td>\n",
" <td>NaN</td>\n",
" <td>6.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>robert</td>\n",
" <td>ohara</td>\n",
" <td>systematics, evolutionary biology, and the his...</td>\n",
" <td>[r. ohara, r.j. ohara, robert ohara, robert...</td>\n",
" <td>NaN</td>\n",
" <td>[evolutionary biology, new england genealogy, ...</td>\n",
" <td>[[isni, 0000000138200102], [researcherid, b-47...</td>\n",
" <td>[[biology, ph.d., harvard university, cambridg...</td>\n",
" <td>NaN</td>\n",
" <td>45</td>\n",
" <td>robert j. ohara</td>\n",
" <td>2014-09-21t02:45:19.620z</td>\n",
" <td>2020-07-09t06:51:09.228z</td>\n",
" <td>23</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>72</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[rjohara.net, google.com, collegiateway.org, r...</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>jesús</td>\n",
" <td>portillo-fernández</td>\n",
" <td>ba in philosophy, ba in humanities, ph.d. in p...</td>\n",
" <td>[jesús portillo fernández, portillo-fernández,...</td>\n",
" <td>NaN</td>\n",
" <td>[absurdo, lingüística, pragmática, filosofía d...</td>\n",
" <td>[[scopus author id, 55229372800]]</td>\n",
" <td>[[, doctor en filología, universidad de sevill...</td>\n",
" <td>[[, grupo de investigación en lógica, lenguaje...</td>\n",
" <td>35</td>\n",
" <td>jesús portillo-fernández</td>\n",
" <td>2015-03-08t20:37:16.590z</td>\n",
" <td>2021-03-12t22:05:28.976z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[us.es, us.es, us.es, google.es, microsoft.com...</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>1.0</td>\n",
" <td>5.0</td>\n",
" <td>5.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>ángel</td>\n",
" <td>carrión-tavárez</td>\n",
" <td>NaN</td>\n",
" <td>[ángel carrión tavárez, á carrión tavárez, ác ...</td>\n",
" <td>NaN</td>\n",
" <td>[editing and publishing, sociomusicology, geog...</td>\n",
" <td>[[loop profile, 687295]]</td>\n",
" <td>[[integration and economic and territorial dev...</td>\n",
" <td>[[director, university of puerto rico at río p...</td>\n",
" <td>132</td>\n",
" <td>ángel carrión-tavárez</td>\n",
" <td>2017-12-30t19:25:41.566z</td>\n",
" <td>2021-03-13t23:21:59.069z</td>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>28</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[academia.edu, redalyc.org, directorioexit.inf...</td>\n",
" <td>NaN</td>\n",
" <td>11.0</td>\n",
" <td>1.0</td>\n",
" <td>6.0</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>caroline wanjiru</td>\n",
" <td>kariuki</td>\n",
" <td>caroline holds a phd in economics from curtin ...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[applied economics, financial economics, appli...</td>\n",
" <td>NaN</td>\n",
" <td>[[economics, doctor of philosophy , curtin uni...</td>\n",
" <td>[[director, educational development, strathmor...</td>\n",
" <td>4</td>\n",
" <td>caroline wanjiru kariuki</td>\n",
" <td>2020-03-18t10:18:04.007z</td>\n",
" <td>2021-02-11t14:40:38.515z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[scopus.com, mendeley.com, publons.com, resear...</td>\n",
" <td>NaN</td>\n",
" <td>13.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>140</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>myo</td>\n",
" <td>kyaw hlaing</td>\n",
" <td>NaN</td>\n",
" <td>[dr myo kyaw hlaing]</td>\n",
" <td>NaN</td>\n",
" <td>[economic geology]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[lecturer, union of myanmar ministry of educa...</td>\n",
" <td>2</td>\n",
" <td>myo kyaw hlaing</td>\n",
" <td>2018-12-26t12:51:57.801z</td>\n",
" <td>2021-01-26t14:36:47.421z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[facebook.com, linkedin.com, instagram.com, re...</td>\n",
" <td>NaN</td>\n",
" <td>12.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>115 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email given_names \\\n",
"0 1 1 1 nurul \n",
"1 1 1 1 carlos \n",
"2 1 1 1 nuria \n",
"4 1 1 1 clara \n",
"5 1 1 1 michele \n",
".. ... ... ... ... \n",
"134 1 1 1 robert \n",
"136 1 1 1 jesús \n",
"137 1 1 1 ángel \n",
"139 1 1 1 caroline wanjiru \n",
"140 1 1 1 myo \n",
"\n",
" family_name biography \\\n",
"0 malahayati google scholar \n",
"1 barrera im individual inventor, and this is my work; s... \n",
"2 hernández-león NaN \n",
"4 sarmento clara sarmento holds an aggregation in cultura... \n",
"5 dantini michele dantini (ph. d) is professor of histor... \n",
".. ... ... \n",
"134 ohara systematics, evolutionary biology, and the his... \n",
"136 portillo-fernández ba in philosophy, ba in humanities, ph.d. in p... \n",
"137 carrión-tavárez NaN \n",
"139 kariuki caroline holds a phd in economics from curtin ... \n",
"140 kyaw hlaing NaN \n",
"\n",
" other_names primary_email \\\n",
"0 NaN NaN \n",
"1 [retrodynamic, novelinflow] NaN \n",
"2 [nuria h. león, nuria hernández león, hernánde... NaN \n",
"4 NaN NaN \n",
"5 NaN NaN \n",
".. ... ... \n",
"134 [r. ohara, r.j. ohara, robert ohara, robert... NaN \n",
"136 [jesús portillo fernández, portillo-fernández,... NaN \n",
"137 [ángel carrión tavárez, á carrión tavárez, ác ... NaN \n",
"139 NaN NaN \n",
"140 [dr myo kyaw hlaing] NaN \n",
"\n",
" keywords \\\n",
"0 NaN \n",
"1 [gearturbine, mechanical, power, innovation, t... \n",
"2 [marketing, research, human resources, busines... \n",
"4 [ethnography, tourism and business, anglo-amer... \n",
"5 [contemporary art, art theory, postcolonial, h... \n",
".. ... \n",
"134 [evolutionary biology, new england genealogy, ... \n",
"136 [absurdo, lingüística, pragmática, filosofía d... \n",
"137 [editing and publishing, sociomusicology, geog... \n",
"139 [applied economics, financial economics, appli... \n",
"140 [economic geology] \n",
"\n",
" external_ids \\\n",
"0 [[researcherid, q-3861-2017]] \n",
"1 [[loop profile, 394457]] \n",
"2 NaN \n",
"4 [[ciência id, d418-d6f8-7d49]] \n",
"5 NaN \n",
".. ... \n",
"134 [[isni, 0000000138200102], [researcherid, b-47... \n",
"136 [[scopus author id, 55229372800]] \n",
"137 [[loop profile, 687295]] \n",
"139 NaN \n",
"140 NaN \n",
"\n",
" education \\\n",
"0 [[civil and transportation engineering , maste... \n",
"1 NaN \n",
"2 [[, course: social skills, university of salam... \n",
"4 [[ao abrigo da bolsa santander ie best practic... \n",
"5 NaN \n",
".. ... \n",
"134 [[biology, ph.d., harvard university, cambridg... \n",
"136 [[, doctor en filología, universidad de sevill... \n",
"137 [[integration and economic and territorial dev... \n",
"139 [[economics, doctor of philosophy , curtin uni... \n",
"140 NaN \n",
"\n",
" employment n_works \\\n",
"0 [[senior lecturer, universitas syiah kuala, ba... 6 \n",
"1 NaN 1 \n",
"2 [[merchandise reception and expedition trainer... 11 \n",
"4 [[presidente da comissão de acreditação do nov... 275 \n",
"5 [[, università per stranieri di perugia, perug... 6 \n",
".. ... ... \n",
"134 NaN 45 \n",
"136 [[, grupo de investigación en lógica, lenguaje... 35 \n",
"137 [[director, university of puerto rico at río p... 132 \n",
"139 [[director, educational development, strathmor... 4 \n",
"140 [[lecturer, union of myanmar ministry of educa... 2 \n",
"\n",
" works_source activation_date \\\n",
"0 nurul malahayati 2017-10-01t00:46:31.324z \n",
"1 carlos barrera 2016-08-29t20:32:10.362z \n",
"2 nuria hernández-león 2015-11-28t07:18:58.442z \n",
"4 clara sarmento 2013-12-12t00:33:58.190z \n",
"5 michele dantini 2014-02-22t17:01:43.444z \n",
".. ... ... \n",
"134 robert j. ohara 2014-09-21t02:45:19.620z \n",
"136 jesús portillo-fernández 2015-03-08t20:37:16.590z \n",
"137 ángel carrión-tavárez 2017-12-30t19:25:41.566z \n",
"139 caroline wanjiru kariuki 2020-03-18t10:18:04.007z \n",
"140 myo kyaw hlaing 2018-12-26t12:51:57.801z \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids label \\\n",
"0 2019-08-19t15:52:47.253z 3 0 0 3 0 \n",
"1 2021-02-09t04:56:35.554z 0 0 0 0 0 \n",
"2 2021-03-05t16:37:47.403z 1 0 0 4 0 \n",
"4 2020-10-12t14:43:00.749z 17 0 0 60 0 \n",
"5 2019-11-25t20:21:04.714z 0 0 0 0 0 \n",
".. ... ... ... ... ... ... \n",
"134 2020-07-09t06:51:09.228z 23 0 0 72 0 \n",
"136 2021-03-12t22:05:28.976z 0 0 0 0 0 \n",
"137 2021-03-13t23:21:59.069z 13 0 0 28 0 \n",
"139 2021-02-11t14:40:38.515z 1 0 0 0 0 \n",
"140 2021-01-26t14:36:47.421z 1 0 0 2 0 \n",
"\n",
" primary_email_domain other_email_domains \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"4 NaN NaN \n",
"5 NaN NaN \n",
".. ... ... \n",
"134 NaN NaN \n",
"136 NaN NaN \n",
"137 NaN NaN \n",
"139 NaN NaN \n",
"140 NaN NaN \n",
"\n",
" url_domains n_emails n_urls \\\n",
"0 [google.com, ristekdikti.go.id, unsyiah.ac.id,... NaN 16.0 \n",
"1 [blogspot.mx, behance.net, authorstream.com, d... NaN 24.0 \n",
"2 [feriaempresamujer.com, escueladenegociosydire... NaN 16.0 \n",
"4 [iscap.pt, google.pt, academia.edu, researchga... NaN 13.0 \n",
"5 [unipmn.it, huffingtonpost.it, roars.it, doppi... NaN 15.0 \n",
".. ... ... ... \n",
"134 [rjohara.net, google.com, collegiateway.org, r... NaN 12.0 \n",
"136 [us.es, us.es, us.es, google.es, microsoft.com... NaN 12.0 \n",
"137 [academia.edu, redalyc.org, directorioexit.inf... NaN 11.0 \n",
"139 [scopus.com, mendeley.com, publons.com, resear... NaN 13.0 \n",
"140 [facebook.com, linkedin.com, instagram.com, re... NaN 12.0 \n",
"\n",
" n_ids n_keywords n_education n_employment \n",
"0 1.0 NaN 2.0 1.0 \n",
"1 1.0 8.0 NaN NaN \n",
"2 NaN 7.0 19.0 16.0 \n",
"4 1.0 6.0 8.0 37.0 \n",
"5 NaN 6.0 NaN 1.0 \n",
".. ... ... ... ... \n",
"134 3.0 5.0 1.0 NaN \n",
"136 1.0 5.0 5.0 1.0 \n",
"137 1.0 6.0 4.0 3.0 \n",
"139 NaN 4.0 3.0 6.0 \n",
"140 NaN 1.0 NaN 2.0 \n",
"\n",
"[115 rows x 30 columns]"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-18 17:43:00 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"exploded_sources[exploded_sources.apply(lambda x: x['works_source'].find(x['given_names']) >= 0, axis=1)]"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-25 15:20:06 +01:00
"## Works source"
2021-03-18 17:43:00 +01:00
]
},
{
2021-03-25 15:20:06 +01:00
"cell_type": "markdown",
2021-03-22 19:08:20 +01:00
"metadata": {},
"source": [
2021-03-25 15:20:06 +01:00
"Paste from Miriam"
2021-03-22 19:08:20 +01:00
]
},
{
2021-03-25 15:20:06 +01:00
"cell_type": "markdown",
2021-03-18 17:43:00 +01:00
"metadata": {},
"source": [
2021-03-25 15:20:06 +01:00
"## External IDs"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-25 15:20:06 +01:00
"External IDs should come from reliable sources. ORCiD registrants cannot add them freely."
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 35,
2021-03-18 17:43:00 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/plain": [
"count 1.301959e+06\n",
"mean 1.358640e+00\n",
"std 6.635087e-01\n",
"min 1.000000e+00\n",
"25% 1.000000e+00\n",
"50% 1.000000e+00\n",
"75% 2.000000e+00\n",
"max 8.000000e+01\n",
"Name: n_ids, dtype: float64"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-18 17:43:00 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"df.n_ids.describe()"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 36,
2021-03-18 17:43:00 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0000-0002-9554-6633</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>john a</td>\n",
" <td>williams</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[scopus author id,  55553733518], [scopus aut...</td>\n",
" <td>NaN</td>\n",
" <td>[[, aston university, birmingham, , gb, 1722, ...</td>\n",
" <td>92</td>\n",
" <td>[aston research explorer]</td>\n",
" <td>2014-11-20t09:42:10.690z</td>\n",
" <td>2021-03-12t01:00:39.996z</td>\n",
" <td>80</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>208</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[aston.ac.uk]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>80.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"0000-0002-9554-6633 1 1 1 \n",
"\n",
" given_names family_name biography other_names \\\n",
"0000-0002-9554-6633 john a williams NaN NaN \n",
"\n",
" primary_email keywords \\\n",
"0000-0002-9554-6633 NaN NaN \n",
"\n",
" external_ids \\\n",
"0000-0002-9554-6633 [[scopus author id,  55553733518], [scopus aut... \n",
"\n",
" education \\\n",
"0000-0002-9554-6633 NaN \n",
"\n",
" employment \\\n",
"0000-0002-9554-6633 [[, aston university, birmingham, , gb, 1722, ... \n",
"\n",
" n_works works_source \\\n",
"0000-0002-9554-6633 92 [aston research explorer] \n",
"\n",
" activation_date last_update_date \\\n",
"0000-0002-9554-6633 2014-11-20t09:42:10.690z 2021-03-12t01:00:39.996z \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label \\\n",
"0000-0002-9554-6633 80 0 0 208 0 \n",
"\n",
" primary_email_domain other_email_domains url_domains \\\n",
"0000-0002-9554-6633 NaN NaN [aston.ac.uk] \n",
"\n",
" n_emails n_urls n_ids n_keywords n_education \\\n",
"0000-0002-9554-6633 NaN 1.0 80.0 NaN NaN \n",
"\n",
" n_employment \n",
"0000-0002-9554-6633 1.0 "
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-18 17:43:00 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"df[df.n_ids == df.n_ids.max()]"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 37,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [],
"source": [
2021-03-25 15:20:06 +01:00
"ids = df[['orcid', 'external_ids']].explode('external_ids').reset_index(drop=True)"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 38,
2021-03-18 17:43:00 +01:00
"metadata": {},
2021-03-24 13:33:01 +01:00
"outputs": [],
2021-03-18 17:43:00 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"ids['provider'] = ids[ids.external_ids.notna()]['external_ids'].apply(lambda x: x[0])"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 39,
2021-03-18 17:43:00 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>external_ids</th>\n",
" <th>provider</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>1</td>\n",
" <td>[researcherid, k-4630-2014]</td>\n",
" <td>researcherid</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>1</td>\n",
" <td>[scopus author id, 54394231000]</td>\n",
" <td>scopus author id</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>1</td>\n",
" <td>[researcherid, p-2223-2018]</td>\n",
" <td>researcherid</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51</th>\n",
" <td>1</td>\n",
" <td>[scopus author id, 57189297461]</td>\n",
" <td>scopus author id</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>1</td>\n",
" <td>[scopus author id, 8399842800]</td>\n",
" <td>scopus author id</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orcid external_ids provider\n",
"9 1 [researcherid, k-4630-2014] researcherid\n",
"29 1 [scopus author id, 54394231000] scopus author id\n",
"47 1 [researcherid, p-2223-2018] researcherid\n",
"51 1 [scopus author id, 57189297461] scopus author id\n",
"65 1 [scopus author id, 8399842800] scopus author id"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-18 17:43:00 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"ids[ids.provider.notna()].head()"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 40,
2021-03-18 17:43:00 +01:00
"metadata": {},
2021-03-24 13:33:01 +01:00
"outputs": [],
2021-03-18 17:43:00 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"top_ids_providers = ids.groupby('provider').count().sort_values('orcid', ascending=False)"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 41,
2021-03-18 17:43:00 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"scopus author id",
"researcherid",
"loop profile",
"ciência id",
"researcher name resolver id",
"中国科学家在线",
"sciprofile",
"isni",
"gnd",
"pitt id",
"technical university of denmark cwis",
"researcher id",
"id dialnet",
"digital author id",
"scopus author id: ",
"authenticusid",
"hku researcherpage",
"uow scholars",
"cti vitae",
"scopus author id:",
"hkust profile",
"chalmers id",
"scopus id",
"iauthor",
"google scholar",
"digital author id (dai)",
"authid",
"dai",
"us epa vivo",
"scopus id",
"authenticus",
"smithsonian profiles",
"github",
"escientist",
"vivo cornell",
"researcherid:",
"id dialnet:",
"dialnet id",
"sciprofiles",
"kaken",
"une researcher id",
"researcherid: ",
"orcid",
"scienceopen",
"profile system identifier",
"orcid id",
"custom"
],
"y": [
1030807,
544825,
117325,
36666,
7907,
4804,
4411,
3075,
2954,
2674,
2483,
1445,
1168,
1124,
1077,
869,
741,
646,
581,
548,
522,
430,
254,
212,
200,
177,
175,
155,
146,
127,
83,
61,
51,
49,
46,
39,
7,
6,
5,
5,
4,
3,
2,
1,
1,
1,
1
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "IDs provided by providers"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"d679749a-90a3-426f-904b-e0d9169dce75\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"d679749a-90a3-426f-904b-e0d9169dce75\")) { Plotly.newPlot( \"d679749a-90a3-426f-904b-e0d9169dce75\", [{\"type\": \"bar\", \"x\": [\"scopus author id\", \"researcherid\", \"loop profile\", \"ci\\u00eancia id\", \"researcher name resolver id\", \"\\u4e2d\\u56fd\\u79d1\\u5b66\\u5bb6\\u5728\\u7ebf\", \"sciprofile\", \"isni\", \"gnd\", \"pitt id\", \"technical university of denmark cwis\", \"researcher id\", \"id dialnet\", \"digital author id\", \"scopus author id: \", \"authenticusid\", \"hku researcherpage\", \"uow scholars\", \"cti vitae\", \"scopus author id:\", \"hkust profile\", \"chalmers id\", \"scopus id\", \"iauthor\", \"google scholar\", \"digital author id (dai)\", \"authid\", \"dai\", \"us epa vivo\", \"scopus id\", \"authenticus\", \"smithsonian profiles\", \"github\", \"escientist\", \"vivo cornell\", \"researcherid:\", \"id dialnet:\", \"dialnet id\", \"sciprofiles\", \"kaken\", \"une researcher id\", \"researcherid: \", \"orcid\", \"scienceopen\", \"profile system identifier\", \"orcid id\", \"custom\"], \"y\": [1030807, 544825, 117325, 36666, 7907, 4804, 4411, 3075, 2954, 2674, 2483, 1445, 1168, 1124, 1077, 869, 741, 646, 581, 548, 522, 430, 254, 212, 200, 177, 175, 155, 146, 127, 83, 61, 51, 49, 46, 39, 7, 6, 5, 5, 4, 3, 2, 1, 1, 1, 1]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ti
" \n",
"var gd = document.getElementById('d679749a-90a3-426f-904b-e0d9169dce75');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2021-03-18 17:43:00 +01:00
"source": [
"data = [\n",
" go.Bar(\n",
2021-03-25 15:20:06 +01:00
" x=top_ids_providers.index,\n",
" y=top_ids_providers['orcid']\n",
2021-03-18 17:43:00 +01:00
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
2021-03-25 15:20:06 +01:00
" title='IDs provided by providers',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
2021-03-18 17:43:00 +01:00
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 42,
2021-03-23 09:47:47 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/plain": [
"array([nan, 'researcherid', 'scopus author id', 'loop profile', 'gnd',\n",
" 'ciência id', 'researcher name resolver id', 'pitt id',\n",
" 'id dialnet', 'isni', 'technical university of denmark cwis',\n",
" 'chalmers id', 'scopus author id: ', 'scopus author id:',\n",
" 'hkust profile', 'hku researcherpage', '中国科学家在线', 'uow scholars',\n",
" 'digital author id', 'sciprofile', 'digital author id (dai)',\n",
" 'cti vitae', 'researcher id', 'authid', 'authenticusid',\n",
" 'vivo cornell', 'us epa vivo', 'escientist',\n",
" 'smithsonian profiles', 'authenticus', 'github', 'iauthor',\n",
" 'orcid id', 'dai', 'scopus id', 'scopus id', 'google scholar',\n",
" 'researcherid:', 'kaken', 'dialnet id', 'researcherid: ',\n",
" 'une researcher id', 'sciprofiles', 'id dialnet:', 'scienceopen',\n",
" 'orcid', 'profile system identifier', 'custom'], dtype=object)"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-23 19:03:37 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"pd.unique(ids['provider'])"
2021-03-23 19:03:37 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-25 15:20:06 +01:00
"## Keywords"
2021-03-23 19:03:37 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-25 15:20:06 +01:00
"This field is problematic as users can be nasty and put multiple keywords in one as opposed of having different keywords. Look this"
2021-03-23 19:03:37 +01:00
]
},
{
2021-03-25 15:20:06 +01:00
"cell_type": "code",
"execution_count": 43,
2021-03-23 19:03:37 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>n_keywords</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0000-0002-0673-0341</th>\n",
" <td>1</td>\n",
" <td>154.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-3343-5660</th>\n",
" <td>1</td>\n",
" <td>148.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-7060-4112</th>\n",
" <td>1</td>\n",
" <td>140.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-6075-3501</th>\n",
" <td>1</td>\n",
" <td>140.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0001-5287-1949</th>\n",
" <td>1</td>\n",
" <td>132.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-1686-1935</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-3800-6331</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-8783-5814</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-7584-2283</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-0529-3538</th>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10916574 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid n_keywords\n",
"0000-0002-0673-0341 1 154.0\n",
"0000-0003-3343-5660 1 148.0\n",
"0000-0002-7060-4112 1 140.0\n",
"0000-0002-6075-3501 1 140.0\n",
"0000-0001-5287-1949 1 132.0\n",
"... ... ...\n",
"0000-0002-1686-1935 1 NaN\n",
"0000-0002-3800-6331 1 NaN\n",
"0000-0002-8783-5814 1 NaN\n",
"0000-0002-7584-2283 1 NaN\n",
"0000-0003-0529-3538 1 NaN\n",
"\n",
"[10916574 rows x 2 columns]"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-23 19:03:37 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"keywords_by_orcid = df[['orcid', 'n_keywords']].sort_values('n_keywords', ascending=False)\n",
"keywords_by_orcid"
2021-03-23 19:03:37 +01:00
]
},
{
2021-03-25 15:20:06 +01:00
"cell_type": "code",
"execution_count": 44,
2021-03-23 19:03:37 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1
],
"y": [
154,
148,
140,
140,
132,
124,
115,
106,
105,
102,
100,
94,
92,
92,
88,
86,
78,
77,
75,
75,
72,
71,
70,
68,
68,
68,
67,
66,
64,
64,
63,
62,
61,
61,
61,
60,
60,
56,
55,
54,
53,
53,
53,
53,
53,
52,
52,
52,
51,
51,
51,
50,
50,
50,
50,
50,
49,
49,
49,
49,
48,
48,
48,
48,
48,
47,
47,
47,
47,
46,
46,
46,
45,
45,
45,
45,
44,
44,
44,
44,
44,
44,
44,
44,
44,
43,
43,
43,
43,
43,
43,
43,
43,
42,
42,
42,
42,
42,
42,
41
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Keywords provided by ORCiD"
},
"xaxis": {
"range": [
-0.5,
99.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"3eb740d3-2f6d-4433-97ee-e93a0d7fc61e\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"3eb740d3-2f6d-4433-97ee-e93a0d7fc61e\")) { Plotly.newPlot( \"3eb740d3-2f6d-4433-97ee-e93a0d7fc61e\", [{\"type\": \"bar\", \"x\": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], \"y\": [154.0, 148.0, 140.0, 140.0, 132.0, 124.0, 115.0, 106.0, 105.0, 102.0, 100.0, 94.0, 92.0, 92.0, 88.0, 86.0, 78.0, 77.0, 75.0, 75.0, 72.0, 71.0, 70.0, 68.0, 68.0, 68.0, 67.0, 66.0, 64.0, 64.0, 63.0, 62.0, 61.0, 61.0, 61.0, 60.0, 60.0, 56.0, 55.0, 54.0, 53.0, 53.0, 53.0, 53.0, 53.0, 52.0, 52.0, 52.0, 51.0, 51.0, 51.0, 50.0, 50.0, 50.0, 50.0, 50.0, 49.0, 49.0, 49.0, 49.0, 48.0, 48.0, 48.0, 48.0, 48.0, 47.0, 47.0, 47.0, 47.0, 46.0, 46.0, 46.0, 45.0, 45.0, 45.0, 45.0, 44.0, 44.0, 44.0, 44.0, 44.0, 44.0, 44.0, 44.0, 44.0, 43.0, 43.0, 43.0, 43.0, 43.0, 43.0, 43.0, 43.0, 42.0, 42.0, 42.0, 42.0, 42.0, 42.0, 41.0]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\
" \n",
"var gd = document.getElementById('3eb740d3-2f6d-4433-97ee-e93a0d7fc61e');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2021-03-23 19:03:37 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"set_top_n(100)\n",
"data = [\n",
" go.Bar(\n",
" x=keywords_by_orcid[:TOP_N]['orcid'],\n",
" y=keywords_by_orcid[:TOP_N]['n_keywords']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Keywords provided by ORCiD',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
2021-03-23 19:03:37 +01:00
]
},
{
2021-03-25 15:20:06 +01:00
"cell_type": "code",
"execution_count": 45,
2021-03-23 19:03:37 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [],
2021-03-23 19:03:37 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"top_keywords = df[['orcid', 'keywords']]\\\n",
" .explode('keywords')\\\n",
" .reset_index(drop=True)\\\n",
" .groupby('keywords')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)"
2021-03-23 19:03:37 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 46,
2021-03-23 19:03:37 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"machine learning",
"bioinformatics",
"education",
"molecular biology",
"cancer",
"ecology",
"artificial intelligence",
"epidemiology",
"public health",
"microbiology",
"neuroscience",
"immunology",
"genetics",
"climate change",
"remote sensing",
"biochemistry",
"genomics",
"biotechnology",
"nanotechnology",
"sustainability",
"gis",
"educación",
"deep learning",
"psychology",
"computer vision",
"marketing",
"nutrition",
"innovation",
"data science",
"statistics",
"data mining",
"image processing",
"nanomaterials",
"robotics",
"management",
"optimization",
"chemistry",
"renewable energy",
"gender",
"diabetes",
"biomaterials",
"educação",
"architecture",
"catalysis",
"history",
"electrochemistry",
"evolution",
"research",
"energy",
"biodiversity"
],
"y": [
8508,
5399,
5169,
4536,
4150,
3906,
3808,
3774,
3666,
3525,
3483,
3455,
3329,
3328,
3261,
2977,
2788,
2670,
2661,
2643,
2499,
2495,
2442,
2368,
2289,
2199,
2185,
2143,
2139,
2138,
2104,
2093,
2089,
2080,
2067,
2064,
2003,
2000,
1990,
1989,
1989,
1847,
1823,
1809,
1809,
1797,
1795,
1776,
1762,
1712
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top-50 keywords occurrence"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"c4eda312-8a34-4b05-8ae5-dfd3792c4d94\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"c4eda312-8a34-4b05-8ae5-dfd3792c4d94\")) { Plotly.newPlot( \"c4eda312-8a34-4b05-8ae5-dfd3792c4d94\", [{\"type\": \"bar\", \"x\": [\"machine learning\", \"bioinformatics\", \"education\", \"molecular biology\", \"cancer\", \"ecology\", \"artificial intelligence\", \"epidemiology\", \"public health\", \"microbiology\", \"neuroscience\", \"immunology\", \"genetics\", \"climate change\", \"remote sensing\", \"biochemistry\", \"genomics\", \"biotechnology\", \"nanotechnology\", \"sustainability\", \"gis\", \"educaci\\u00f3n\", \"deep learning\", \"psychology\", \"computer vision\", \"marketing\", \"nutrition\", \"innovation\", \"data science\", \"statistics\", \"data mining\", \"image processing\", \"nanomaterials\", \"robotics\", \"management\", \"optimization\", \"chemistry\", \"renewable energy\", \"gender\", \"diabetes\", \"biomaterials\", \"educa\\u00e7\\u00e3o\", \"architecture\", \"catalysis\", \"history\", \"electrochemistry\", \"evolution\", \"research\", \"energy\", \"biodiversity\"], \"y\": [8508, 5399, 5169, 4536, 4150, 3906, 3808, 3774, 3666, 3525, 3483, 3455, 3329, 3328, 3261, 2977, 2788, 2670, 2661, 2643, 2499, 2495, 2442, 2368, 2289, 2199, 2185, 2143, 2139, 2138, 2104, 2093, 2089, 2080, 2067, 2064, 2003, 2000, 1990, 1989, 1989, 1847, 1823, 1809, 1809, 1797, 1795, 1776, 1762, 1712]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [
" \n",
"var gd = document.getElementById('c4eda312-8a34-4b05-8ae5-dfd3792c4d94');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2021-03-23 19:03:37 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"set_top_n(50)\n",
"data = [\n",
" go.Bar(\n",
" x=top_keywords[:TOP_N].index,\n",
" y=top_keywords[:TOP_N]['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top-%s keywords occurrence' % TOP_N,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
2021-03-23 19:03:37 +01:00
]
},
{
2021-03-25 15:20:06 +01:00
"cell_type": "markdown",
2021-03-24 13:33:01 +01:00
"metadata": {},
"source": [
2021-03-25 15:20:06 +01:00
"## Education"
2021-03-24 13:33:01 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 47,
2021-03-24 13:33:01 +01:00
"metadata": {},
"outputs": [],
2021-03-23 19:03:37 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"def extract_education(lst):\n",
" educations = []\n",
" for e in lst:\n",
" # e[0] degree\n",
" # e[1] role\n",
" # e[2] university\n",
" # e[..] city, region, country, id, id_scheme\n",
" educations.append(' '.join([e[0], e[1], e[2]]))\n",
" return educations"
2021-03-23 19:03:37 +01:00
]
},
{
"cell_type": "code",
2021-03-24 13:33:01 +01:00
"execution_count": null,
2021-03-23 19:03:37 +01:00
"metadata": {},
"outputs": [],
2021-03-25 15:20:06 +01:00
"source": []
2021-03-23 19:03:37 +01:00
},
{
"cell_type": "code",
2021-03-24 13:33:01 +01:00
"execution_count": null,
2021-03-23 19:03:37 +01:00
"metadata": {},
"outputs": [],
2021-03-25 15:20:06 +01:00
"source": []
2021-03-23 19:03:37 +01:00
},
{
2021-03-25 15:20:06 +01:00
"cell_type": "markdown",
2021-03-23 19:03:37 +01:00
"metadata": {},
"source": [
2021-03-25 15:20:06 +01:00
"## Employment"
2021-03-23 19:03:37 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 48,
"metadata": {},
"outputs": [],
"source": [
2021-03-25 15:20:06 +01:00
"def extract_employment(lst):\n",
" res = []\n",
" for e in lst:\n",
" # e[0] role\n",
" # e[1] institute\n",
" # e[..] city, region, country, id, id_scheme\n",
" res.append(' '.join([e[0], e[1]]))\n",
" return res"
]
},
{
"cell_type": "code",
2021-03-24 13:33:01 +01:00
"execution_count": null,
2021-03-23 19:03:37 +01:00
"metadata": {},
2021-03-24 13:33:01 +01:00
"outputs": [],
2021-03-25 15:20:06 +01:00
"source": []
2021-03-23 09:47:47 +01:00
},
{
2021-03-23 19:03:37 +01:00
"cell_type": "code",
2021-03-24 13:33:01 +01:00
"execution_count": null,
2021-03-23 09:47:47 +01:00
"metadata": {},
2021-03-24 13:33:01 +01:00
"outputs": [],
2021-03-25 15:20:06 +01:00
"source": []
2021-03-23 09:47:47 +01:00
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-25 15:20:06 +01:00
"## Biography"
2021-03-23 09:47:47 +01:00
]
},
{
2021-03-23 19:03:37 +01:00
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 49,
2021-03-23 09:47:47 +01:00
"metadata": {},
2021-03-24 13:33:01 +01:00
"outputs": [],
2021-03-23 09:47:47 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"df['biography'] = df[df.biography.notna()]['biography'].replace('', np.NaN)"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 50,
2021-03-23 09:47:47 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/plain": [
"count 348649\n",
"unique 332523\n",
"top car title loans are a more straightforward way...\n",
"freq 343\n",
"Name: biography, dtype: object"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-23 09:47:47 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"df.biography.describe()"
2021-03-23 19:03:37 +01:00
]
},
{
2021-03-25 15:20:06 +01:00
"cell_type": "code",
"execution_count": 51,
2021-03-23 19:03:37 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0000-0002-7397-7977</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan upland]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-06t06:10:20.070z</td>\n",
" <td>2020-11-06t06:24:28.005z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-4931-9736</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan saratoga]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-13t01:04:19.859z</td>\n",
" <td>2020-11-13t01:15:12.546z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0001-8221-2303</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan victorville]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-05t00:38:21.096z</td>\n",
" <td>2020-11-05t00:40:40.091z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0001-6736-072X</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-12-08t05:38:30.786z</td>\n",
" <td>2020-12-08t05:40:03.786z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-8727-1246</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[loan agency]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan online, car title loan north o...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-12-10t08:54:56.127z</td>\n",
" <td>2020-12-10t08:57:15.791z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>4.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-9640-8136</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan clovis]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-10-22t06:11:02.945z</td>\n",
" <td>2020-10-22t06:17:09.111z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-6926-3752</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan escondido]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-12-03t02:00:33.684z</td>\n",
" <td>2020-12-03t02:02:07.054z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-3655-4713</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan san rafael]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-18t00:39:17.492z</td>\n",
" <td>2020-11-18t00:52:19.024z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-8724-1020</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan san juan capistrano]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-11-19t00:31:54.080z</td>\n",
" <td>2020-11-19t00:34:08.721z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-4601-4569</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>premium car</td>\n",
" <td>title loans</td>\n",
" <td>car title loans are a more straightforward way...</td>\n",
" <td>[premium car title loans]</td>\n",
" <td>NaN</td>\n",
" <td>[car title loan mount pleasant]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-10-16t00:32:26.207z</td>\n",
" <td>2020-10-16t00:37:42.646z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[premiumcartitleloans.com]</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>421 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid verified_email verified_primary_email \\\n",
"0000-0002-7397-7977 1 1 1 \n",
"0000-0003-4931-9736 1 1 1 \n",
"0000-0001-8221-2303 1 1 1 \n",
"0000-0001-6736-072X 1 1 1 \n",
"0000-0002-8727-1246 1 1 1 \n",
"... ... ... ... \n",
"0000-0002-9640-8136 1 1 1 \n",
"0000-0002-6926-3752 1 1 1 \n",
"0000-0002-3655-4713 1 1 1 \n",
"0000-0002-8724-1020 1 1 1 \n",
"0000-0002-4601-4569 1 1 1 \n",
"\n",
" given_names family_name \\\n",
"0000-0002-7397-7977 premium car title loans \n",
"0000-0003-4931-9736 premium car title loans \n",
"0000-0001-8221-2303 premium car title loans \n",
"0000-0001-6736-072X premium car title loans \n",
"0000-0002-8727-1246 premium car title loans \n",
"... ... ... \n",
"0000-0002-9640-8136 premium car title loans \n",
"0000-0002-6926-3752 premium car title loans \n",
"0000-0002-3655-4713 premium car title loans \n",
"0000-0002-8724-1020 premium car title loans \n",
"0000-0002-4601-4569 premium car title loans \n",
"\n",
" biography \\\n",
"0000-0002-7397-7977 car title loans are a more straightforward way... \n",
"0000-0003-4931-9736 car title loans are a more straightforward way... \n",
"0000-0001-8221-2303 car title loans are a more straightforward way... \n",
"0000-0001-6736-072X car title loans are a more straightforward way... \n",
"0000-0002-8727-1246 car title loans are a more straightforward way... \n",
"... ... \n",
"0000-0002-9640-8136 car title loans are a more straightforward way... \n",
"0000-0002-6926-3752 car title loans are a more straightforward way... \n",
"0000-0002-3655-4713 car title loans are a more straightforward way... \n",
"0000-0002-8724-1020 car title loans are a more straightforward way... \n",
"0000-0002-4601-4569 car title loans are a more straightforward way... \n",
"\n",
" other_names primary_email \\\n",
"0000-0002-7397-7977 [premium car title loans] NaN \n",
"0000-0003-4931-9736 [premium car title loans] NaN \n",
"0000-0001-8221-2303 [premium car title loans] NaN \n",
"0000-0001-6736-072X NaN NaN \n",
"0000-0002-8727-1246 [loan agency] NaN \n",
"... ... ... \n",
"0000-0002-9640-8136 [premium car title loans] NaN \n",
"0000-0002-6926-3752 [premium car title loans] NaN \n",
"0000-0002-3655-4713 [premium car title loans] NaN \n",
"0000-0002-8724-1020 [premium car title loans] NaN \n",
"0000-0002-4601-4569 [premium car title loans] NaN \n",
"\n",
" keywords \\\n",
"0000-0002-7397-7977 [car title loan upland] \n",
"0000-0003-4931-9736 [car title loan saratoga] \n",
"0000-0001-8221-2303 [car title loan victorville] \n",
"0000-0001-6736-072X NaN \n",
"0000-0002-8727-1246 [car title loan online, car title loan north o... \n",
"... ... \n",
"0000-0002-9640-8136 [car title loan clovis] \n",
"0000-0002-6926-3752 [car title loan escondido] \n",
"0000-0002-3655-4713 [car title loan san rafael] \n",
"0000-0002-8724-1020 [car title loan san juan capistrano] \n",
"0000-0002-4601-4569 [car title loan mount pleasant] \n",
"\n",
" external_ids education employment n_works works_source \\\n",
"0000-0002-7397-7977 NaN NaN NaN 0 NaN \n",
"0000-0003-4931-9736 NaN NaN NaN 0 NaN \n",
"0000-0001-8221-2303 NaN NaN NaN 0 NaN \n",
"0000-0001-6736-072X NaN NaN NaN 0 NaN \n",
"0000-0002-8727-1246 NaN NaN NaN 0 NaN \n",
"... ... ... ... ... ... \n",
"0000-0002-9640-8136 NaN NaN NaN 0 NaN \n",
"0000-0002-6926-3752 NaN NaN NaN 0 NaN \n",
"0000-0002-3655-4713 NaN NaN NaN 0 NaN \n",
"0000-0002-8724-1020 NaN NaN NaN 0 NaN \n",
"0000-0002-4601-4569 NaN NaN NaN 0 NaN \n",
"\n",
" activation_date last_update_date \\\n",
"0000-0002-7397-7977 2020-11-06t06:10:20.070z 2020-11-06t06:24:28.005z \n",
"0000-0003-4931-9736 2020-11-13t01:04:19.859z 2020-11-13t01:15:12.546z \n",
"0000-0001-8221-2303 2020-11-05t00:38:21.096z 2020-11-05t00:40:40.091z \n",
"0000-0001-6736-072X 2020-12-08t05:38:30.786z 2020-12-08t05:40:03.786z \n",
"0000-0002-8727-1246 2020-12-10t08:54:56.127z 2020-12-10t08:57:15.791z \n",
"... ... ... \n",
"0000-0002-9640-8136 2020-10-22t06:11:02.945z 2020-10-22t06:17:09.111z \n",
"0000-0002-6926-3752 2020-12-03t02:00:33.684z 2020-12-03t02:02:07.054z \n",
"0000-0002-3655-4713 2020-11-18t00:39:17.492z 2020-11-18t00:52:19.024z \n",
"0000-0002-8724-1020 2020-11-19t00:31:54.080z 2020-11-19t00:34:08.721z \n",
"0000-0002-4601-4569 2020-10-16t00:32:26.207z 2020-10-16t00:37:42.646z \n",
"\n",
" n_doi n_arxiv n_pmc n_other_pids label \\\n",
"0000-0002-7397-7977 0 0 0 0 0 \n",
"0000-0003-4931-9736 0 0 0 0 0 \n",
"0000-0001-8221-2303 0 0 0 0 0 \n",
"0000-0001-6736-072X 0 0 0 0 0 \n",
"0000-0002-8727-1246 0 0 0 0 0 \n",
"... ... ... ... ... ... \n",
"0000-0002-9640-8136 0 0 0 0 0 \n",
"0000-0002-6926-3752 0 0 0 0 0 \n",
"0000-0002-3655-4713 0 0 0 0 0 \n",
"0000-0002-8724-1020 0 0 0 0 0 \n",
"0000-0002-4601-4569 0 0 0 0 0 \n",
"\n",
" primary_email_domain other_email_domains \\\n",
"0000-0002-7397-7977 NaN NaN \n",
"0000-0003-4931-9736 NaN NaN \n",
"0000-0001-8221-2303 NaN NaN \n",
"0000-0001-6736-072X NaN NaN \n",
"0000-0002-8727-1246 NaN NaN \n",
"... ... ... \n",
"0000-0002-9640-8136 NaN NaN \n",
"0000-0002-6926-3752 NaN NaN \n",
"0000-0002-3655-4713 NaN NaN \n",
"0000-0002-8724-1020 NaN NaN \n",
"0000-0002-4601-4569 NaN NaN \n",
"\n",
" url_domains n_emails n_urls n_ids \\\n",
"0000-0002-7397-7977 [premiumcartitleloans.com] NaN 1.0 NaN \n",
"0000-0003-4931-9736 [premiumcartitleloans.com] NaN 1.0 NaN \n",
"0000-0001-8221-2303 [premiumcartitleloans.com] NaN 1.0 NaN \n",
"0000-0001-6736-072X [premiumcartitleloans.com] NaN 1.0 NaN \n",
"0000-0002-8727-1246 [premiumcartitleloans.com] NaN 1.0 NaN \n",
"... ... ... ... ... \n",
"0000-0002-9640-8136 [premiumcartitleloans.com] NaN 1.0 NaN \n",
"0000-0002-6926-3752 [premiumcartitleloans.com] NaN 1.0 NaN \n",
"0000-0002-3655-4713 [premiumcartitleloans.com] NaN 1.0 NaN \n",
"0000-0002-8724-1020 [premiumcartitleloans.com] NaN 1.0 NaN \n",
"0000-0002-4601-4569 [premiumcartitleloans.com] NaN 1.0 NaN \n",
"\n",
" n_keywords n_education n_employment \n",
"0000-0002-7397-7977 1.0 NaN NaN \n",
"0000-0003-4931-9736 1.0 NaN NaN \n",
"0000-0001-8221-2303 1.0 NaN NaN \n",
"0000-0001-6736-072X NaN NaN NaN \n",
"0000-0002-8727-1246 4.0 NaN NaN \n",
"... ... ... ... \n",
"0000-0002-9640-8136 1.0 NaN NaN \n",
"0000-0002-6926-3752 1.0 NaN NaN \n",
"0000-0002-3655-4713 1.0 NaN NaN \n",
"0000-0002-8724-1020 1.0 NaN NaN \n",
"0000-0002-4601-4569 1.0 NaN NaN \n",
"\n",
"[421 rows x 30 columns]"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-23 19:03:37 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"df[(df.biography.notna()) & (df.biography.str.contains('car title loans are a more straightforward'))]"
2021-03-23 19:03:37 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 52,
2021-03-23 19:03:37 +01:00
"metadata": {},
"outputs": [],
"source": [
2021-03-25 15:20:06 +01:00
"def score(bio):\n",
" try:\n",
" return antispam.score(bio)\n",
" except: # if len(bio) < 3 the filter doesn't know how to handle that\n",
" return -1"
2021-03-23 19:03:37 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 53,
2021-03-23 19:03:37 +01:00
"metadata": {},
"outputs": [],
"source": [
2021-03-25 15:20:06 +01:00
"df['spam_score'] = df[df.biography.notna()]['biography'].apply(lambda bio: score(bio))"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 54,
2021-03-23 09:47:47 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>biography</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0000-0003-0505-2734</th>\n",
" <td>1</td>\n",
" <td>j</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0001-7686-1032</th>\n",
" <td>1</td>\n",
" <td>hi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-3417-7299</th>\n",
" <td>1</td>\n",
" <td>.....</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-3794-1288</th>\n",
" <td>1</td>\n",
" <td>m.d., ph.d.</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0001-9655-4806</th>\n",
" <td>1</td>\n",
" <td>肿瘤</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-3823-2678</th>\n",
" <td>1</td>\n",
" <td>b.e, m.e. ph.d</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-4041-0840</th>\n",
" <td>1</td>\n",
" <td>/</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-4285-8537</th>\n",
" <td>1</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-1545-8773</th>\n",
" <td>1</td>\n",
" <td>hi</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-6302-4224</th>\n",
" <td>1</td>\n",
" <td>.</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>343 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid biography\n",
"0000-0003-0505-2734 1 j\n",
"0000-0001-7686-1032 1 hi\n",
"0000-0002-3417-7299 1 .....\n",
"0000-0003-3794-1288 1 m.d., ph.d.\n",
"0000-0001-9655-4806 1 肿瘤\n",
"... ... ...\n",
"0000-0003-3823-2678 1 b.e, m.e. ph.d\n",
"0000-0003-4041-0840 1 /\n",
"0000-0002-4285-8537 1 \n",
"0000-0002-1545-8773 1 hi\n",
"0000-0002-6302-4224 1 .\n",
"\n",
"[343 rows x 2 columns]"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-23 09:35:35 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"df[df.spam_score == -1][['orcid','biography']]"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 55,
2021-03-23 09:47:47 +01:00
"metadata": {},
"outputs": [],
"source": [
2021-03-25 15:20:06 +01:00
"df['spam_score'] = df['spam_score'].replace(-1, np.NaN)"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 56,
2021-03-23 09:47:47 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/plain": [
"count 3.483060e+05\n",
"mean 6.117792e-01\n",
"std 4.472392e-01\n",
"min 1.917500e-22\n",
"25% 1.969077e-02\n",
"50% 9.563239e-01\n",
"75% 9.999993e-01\n",
"max 1.000000e+00\n",
"Name: spam_score, dtype: float64"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-23 09:47:47 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"df.spam_score.describe()"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 57,
2021-03-23 09:47:47 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>biography</th>\n",
" <th>spam_score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0000-0002-2638-4108</th>\n",
" <td>investigador de la universidad de oviedo. depa...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-2862-6139</th>\n",
" <td>formación académica en la temática de manejo d...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-8873-189X</th>\n",
" <td>doctor en educación, maestro en gerencia de la...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-1291-3782</th>\n",
" <td>possui graduação em psicologia pela pontifícia...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-2263-6646</th>\n",
" <td>roofing contractors in seattle waroofing contr...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-2606-3849</th>\n",
" <td>jose ignacio peláez sánchez ha sido profesor e...</td>\n",
" <td>0.999966</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-0459-4822</th>\n",
" <td>mestranda em tecnologia na saúde e foi aluna o...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0003-0057-1551</th>\n",
" <td>the phd degree of pharmacy was received under ...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-7878-164X</th>\n",
" <td>mostafa metwaly is an assistant lecturer at th...</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0000-0002-6633-0673</th>\n",
" <td>jual obat aborsi di tangerang, obat penggugur ...</td>\n",
" <td>0.999999</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>119552 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" biography \\\n",
"0000-0002-2638-4108 investigador de la universidad de oviedo. depa... \n",
"0000-0003-2862-6139 formación académica en la temática de manejo d... \n",
"0000-0002-8873-189X doctor en educación, maestro en gerencia de la... \n",
"0000-0003-1291-3782 possui graduação em psicologia pela pontifícia... \n",
"0000-0002-2263-6646 roofing contractors in seattle waroofing contr... \n",
"... ... \n",
"0000-0002-2606-3849 jose ignacio peláez sánchez ha sido profesor e... \n",
"0000-0003-0459-4822 mestranda em tecnologia na saúde e foi aluna o... \n",
"0000-0003-0057-1551 the phd degree of pharmacy was received under ... \n",
"0000-0002-7878-164X mostafa metwaly is an assistant lecturer at th... \n",
"0000-0002-6633-0673 jual obat aborsi di tangerang, obat penggugur ... \n",
"\n",
" spam_score \n",
"0000-0002-2638-4108 1.000000 \n",
"0000-0003-2862-6139 1.000000 \n",
"0000-0002-8873-189X 1.000000 \n",
"0000-0003-1291-3782 1.000000 \n",
"0000-0002-2263-6646 1.000000 \n",
"... ... \n",
"0000-0002-2606-3849 0.999966 \n",
"0000-0003-0459-4822 1.000000 \n",
"0000-0003-0057-1551 1.000000 \n",
"0000-0002-7878-164X 1.000000 \n",
"0000-0002-6633-0673 0.999999 \n",
"\n",
"[119552 rows x 2 columns]"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-23 12:13:04 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"df[df.spam_score > 0.9999][['biography', 'spam_score']]"
2021-03-23 12:13:04 +01:00
]
},
{
2021-03-25 15:20:06 +01:00
"cell_type": "markdown",
2021-03-23 09:47:47 +01:00
"metadata": {},
"source": [
2021-03-25 15:20:06 +01:00
"## All VS All correlation"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 58,
2021-03-23 09:47:47 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"coloraxis": "coloraxis",
"hovertemplate": "x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>",
"name": "0",
"type": "heatmap",
"x": [
"orcid",
"verified_email",
"verified_primary_email",
"n_works",
"n_doi",
"n_arxiv",
"n_pmc",
"n_other_pids",
"label",
"n_emails",
"n_urls",
"n_ids",
"n_keywords",
"n_education",
"n_employment",
"spam_score"
],
"xaxis": "x",
"y": [
"orcid",
"verified_email",
"verified_primary_email",
"n_works",
"n_doi",
"n_arxiv",
"n_pmc",
"n_other_pids",
"label",
"n_emails",
"n_urls",
"n_ids",
"n_keywords",
"n_education",
"n_employment",
"spam_score"
],
"yaxis": "y",
"z": [
[
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
],
[
null,
1,
0.965027512571463,
0.07909450389448938,
0.07267250617006468,
0.006476297655163804,
0.030600690078714833,
0.06072651274610396,
null,
0.01724525375901219,
0.011734321412087068,
0.08789094997479488,
0.04349305792976212,
0.06294096375432985,
0.03249170979418152,
-0.0009676223108538206
],
[
null,
0.965027512571463,
1,
0.0819319323153257,
0.07525273852111228,
0.006701227684123712,
0.031695842542234315,
0.06287595850156351,
null,
0.012922886805318948,
0.012387715663549998,
0.08913447886422585,
0.04319428069032002,
0.06287478936154348,
0.03218830236670472,
-0.0012562388439236645
],
[
null,
0.07909450389448938,
0.0819319323153257,
1,
0.9378826746732684,
0.3126908705369688,
0.3510119929598013,
0.8350663052170557,
null,
0.04718988564738906,
0.05653910865456996,
0.24172815699207165,
0.10039832975281514,
0.07841166999115001,
0.13046589790007565,
0.03185515400361228
],
[
null,
0.07267250617006468,
0.07525273852111228,
0.9378826746732684,
1,
0.35657234546387656,
0.3621917393246728,
0.8012635261223445,
null,
0.043499007216112696,
0.03647205658234223,
0.2269939652523629,
0.08797332017106713,
0.059729319529628046,
0.10807896768803292,
0.022572749381159763
],
[
null,
0.006476297655163804,
0.006701227684123712,
0.3126908705369688,
0.35657234546387656,
1,
0.0009026428265918365,
0.24215761656047952,
null,
-0.0012865085810765875,
-0.001329117426167316,
0.0055175608369640175,
0.005343734662423831,
0.002858131608668,
0.012070494908066045,
-0.004264541425264081
],
[
null,
0.030600690078714833,
0.031695842542234315,
0.3510119929598013,
0.3621917393246728,
0.0009026428265918365,
1,
0.2568594049240261,
null,
0.007094547284091986,
0.009274871422764654,
0.06890564721203653,
0.04384027185991069,
0.044227134178966364,
0.06513883102687293,
0.04384493133686868
],
[
null,
0.06072651274610396,
0.06287595850156351,
0.8350663052170557,
0.8012635261223445,
0.24215761656047952,
0.2568594049240261,
1,
null,
0.03217450452465033,
0.026308875350291965,
0.23592717383228326,
0.07883129209802732,
0.05208032423018972,
0.09185353246013575,
0.026633147020694893
],
[
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null,
null
],
[
null,
0.01724525375901219,
0.012922886805318948,
0.04718988564738906,
0.043499007216112696,
-0.0012865085810765875,
0.007094547284091986,
0.03217450452465033,
null,
1,
0.10958644107635762,
0.04522619986981265,
0.057271130821122146,
0.042912264959997656,
0.06989712580810882,
-0.003962136064906193
],
[
null,
0.011734321412087068,
0.012387715663549998,
0.05653910865456996,
0.03647205658234223,
-0.001329117426167316,
0.009274871422764654,
0.026308875350291965,
null,
0.10958644107635762,
1,
0.06946298201611982,
0.14850020945342837,
0.09587783320820187,
0.10097489869640557,
0.059397185555557654
],
[
null,
0.08789094997479488,
0.08913447886422585,
0.24172815699207165,
0.2269939652523629,
0.0055175608369640175,
0.06890564721203653,
0.23592717383228326,
null,
0.04522619986981265,
0.06946298201611982,
1,
0.0821637191798123,
0.06185375286572581,
0.10400677096543276,
0.035508933757786715
],
[
null,
0.04349305792976212,
0.04319428069032002,
0.10039832975281514,
0.08797332017106713,
0.005343734662423831,
0.04384027185991069,
0.07883129209802732,
null,
0.057271130821122146,
0.14850020945342837,
0.0821637191798123,
1,
0.13378013997427662,
0.15480312032926746,
0.04131338504782112
],
[
null,
0.06294096375432985,
0.06287478936154348,
0.07841166999115001,
0.059729319529628046,
0.002858131608668,
0.044227134178966364,
0.05208032423018972,
null,
0.042912264959997656,
0.09587783320820187,
0.06185375286572581,
0.13378013997427662,
1,
0.3541453788931816,
0.07223966422815224
],
[
null,
0.03249170979418152,
0.03218830236670472,
0.13046589790007565,
0.10807896768803292,
0.012070494908066045,
0.06513883102687293,
0.09185353246013575,
null,
0.06989712580810882,
0.10097489869640557,
0.10400677096543276,
0.15480312032926746,
0.3541453788931816,
1,
0.036360329635112675
],
[
null,
-0.0009676223108538206,
-0.0012562388439236645,
0.03185515400361228,
0.022572749381159763,
-0.004264541425264081,
0.04384493133686868,
0.026633147020694893,
null,
-0.003962136064906193,
0.059397185555557654,
0.035508933757786715,
0.04131338504782112,
0.07223966422815224,
0.036360329635112675,
1
]
]
}
],
"layout": {
"coloraxis": {
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"margin": {
"t": 60
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"xaxis": {
"anchor": "y",
"constrain": "domain",
"domain": [
0,
1
],
"scaleanchor": "y"
},
"yaxis": {
"anchor": "x",
"autorange": "reversed",
"constrain": "domain",
"domain": [
0,
1
]
}
}
},
"text/html": [
"<div> <div id=\"454c5049-0a2f-47b4-b3f1-190a4446a91b\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"454c5049-0a2f-47b4-b3f1-190a4446a91b\")) { Plotly.newPlot( \"454c5049-0a2f-47b4-b3f1-190a4446a91b\", [{\"coloraxis\": \"coloraxis\", \"hovertemplate\": \"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\", \"name\": \"0\", \"type\": \"heatmap\", \"x\": [\"orcid\", \"verified_email\", \"verified_primary_email\", \"n_works\", \"n_doi\", \"n_arxiv\", \"n_pmc\", \"n_other_pids\", \"label\", \"n_emails\", \"n_urls\", \"n_ids\", \"n_keywords\", \"n_education\", \"n_employment\", \"spam_score\"], \"xaxis\": \"x\", \"y\": [\"orcid\", \"verified_email\", \"verified_primary_email\", \"n_works\", \"n_doi\", \"n_arxiv\", \"n_pmc\", \"n_other_pids\", \"label\", \"n_emails\", \"n_urls\", \"n_ids\", \"n_keywords\", \"n_education\", \"n_employment\", \"spam_score\"], \"yaxis\": \"y\", \"z\": [[null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], [null, 1.0, 0.965027512571463, 0.07909450389448938, 0.07267250617006468, 0.006476297655163804, 0.030600690078714833, 0.06072651274610396, null, 0.01724525375901219, 0.011734321412087068, 0.08789094997479488, 0.04349305792976212, 0.06294096375432985, 0.03249170979418152, -0.0009676223108538206], [null, 0.965027512571463, 1.0, 0.0819319323153257, 0.07525273852111228, 0.006701227684123712, 0.031695842542234315, 0.06287595850156351, null, 0.012922886805318948, 0.012387715663549998, 0.08913447886422585, 0.04319428069032002, 0.06287478936154348, 0.03218830236670472, -0.0012562388439236645], [null, 0.07909450389448938, 0.0819319323153257, 1.0, 0.9378826746732684, 0.3126908705369688, 0.3510119929598013, 0.8350663052170557, null, 0.04718988564738906, 0.05653910865456996, 0.24172815699207165, 0.10039832975281514, 0.07841166999115001, 0.13046589790007565, 0.03185515400361228], [null, 0.07267250617006468, 0.07525273852111228, 0.9378826746732684, 1.0, 0.35657234546387656, 0.3621917393246728, 0.8012635261223445, null, 0.043499007216112696, 0.03647205658234223, 0.2269939652523629, 0.08797332017106713, 0.059729319529628046, 0.10807896768803292, 0.022572749381159763], [null, 0.006476297655163804, 0.006701227684123712, 0.3126908705369688, 0.35657234546387656, 1.0, 0.0009026428265918365, 0.24215761656047952, null, -0.0012865085810765875, -0.001329117426167316, 0.0055175608369640175, 0.005343734662423831, 0.002858131608668, 0.012070494908066045, -0.004264541425264081], [null, 0.030600690078714833, 0.031695842542234315, 0.3510119929598013, 0.3621917393246728, 0.0009026428265918365, 1.0, 0.2568594049240261, null, 0.007094547284091986, 0.009274871422764654, 0.06890564721203653, 0.04384027185991069, 0.044227134178966364, 0.06513883102687293, 0.04384493133686868], [null, 0.06072651274610396, 0.06287595850156351, 0.8350663052170557, 0.8012635261223445, 0.24215761656047952, 0.2568594049240261, 1.0, null, 0.03217450452465033, 0.026308875350291965, 0.23592717383228326, 0.07883129209802732, 0.05208032423018972, 0.09185353246013575, 0.026633147020694893], [null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null], [null, 0.01724525375901219, 0.012922886805318948, 0.04718988564738906, 0.043499007216112696, -0.0012865085810765875, 0.007094547284091986, 0.03217450452465033, null, 1.0, 0.10958644107635762, 0.04522619986981265, 0.057271130821122146, 0.042912264959997656, 0.06989712580810882, -0.003962136064906193], [null, 0.011734321412087068, 0.012387715663549998, 0.05653910865456996, 0.03647205658234223, -0.001329117426167316, 0.009274871422764654, 0.026308875350291965, null, 0.10958644107635762, 1.0, 0.06946298201611982, 0.14850020945342837, 0.09587783320820187, 0.10097489869640557, 0.059397185555557654]
" \n",
"var gd = document.getElementById('454c5049-0a2f-47b4-b3f1-190a4446a91b');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2021-03-23 12:13:04 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"fig = px.imshow(df.corr())\n",
"fig.show()"
2021-03-23 09:35:35 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-25 15:20:06 +01:00
"## Label speculation"
2021-03-23 09:35:35 +01:00
]
},
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 59,
2021-03-24 13:33:01 +01:00
"metadata": {},
2021-03-25 15:20:06 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>primary_email</th>\n",
" <th>keywords</th>\n",
" <th>external_ids</th>\n",
" <th>education</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>url_domains</th>\n",
" <th>n_emails</th>\n",
" <th>n_urls</th>\n",
" <th>n_ids</th>\n",
" <th>n_keywords</th>\n",
" <th>n_education</th>\n",
" <th>n_employment</th>\n",
" <th>spam_score</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [orcid, verified_email, verified_primary_email, given_names, family_name, biography, other_names, primary_email, keywords, external_ids, education, employment, n_works, works_source, activation_date, last_update_date, n_doi, n_arxiv, n_pmc, n_other_pids, label, primary_email_domain, other_email_domains, url_domains, n_emails, n_urls, n_ids, n_keywords, n_education, n_employment, spam_score]\n",
"Index: []"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-24 13:33:01 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"df[df.label == 1]"
2021-03-24 13:33:01 +01:00
]
},
2021-03-23 19:03:37 +01:00
{
"cell_type": "code",
2021-03-25 15:20:06 +01:00
"execution_count": 60,
2021-03-23 19:03:37 +01:00
"metadata": {},
"outputs": [],
2021-03-24 13:33:01 +01:00
"source": [
2021-03-25 15:20:06 +01:00
"# (df.n_works > 0) & (df.n_ids > 1)"
2021-03-24 13:33:01 +01:00
]
2021-03-23 19:03:37 +01:00
},
2021-03-18 17:43:00 +01:00
{
"cell_type": "code",
2021-03-22 19:08:20 +01:00
"execution_count": null,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [],
2021-03-22 19:08:20 +01:00
"source": []
2021-03-18 17:43:00 +01:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}