fake-orcid-analysis/notebooks/01-Exploration.ipynb

14013 lines
477 KiB
Plaintext
Raw Normal View History

2021-03-18 17:43:00 +01:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-19 12:19:45 +01:00
"# Exploratory analysis"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"TODO:\n",
"- Understanding the reason for fake profiles can bring insight on how to catch them (could be trivial with prior knowledge, e.g., SEO hacking => URLs)\n",
"- Make casistics (e.g. author publishing with empty orcid, author publishing but not on OpenAIRE, etc.)\n",
"- Temporal dimension of any use?\n",
"- Can we access private info thanks to the OpenAIRE-ORCID agreement?\n"
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 1,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" <script type=\"text/javascript\">\n",
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
" if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
" if (typeof require !== 'undefined') {\n",
" require.undef(\"plotly\");\n",
" requirejs.config({\n",
" paths: {\n",
" 'plotly': ['https://cdn.plot.ly/plotly-latest.min']\n",
" }\n",
" });\n",
" require(['plotly'], function(Plotly) {\n",
" window._Plotly = Plotly;\n",
" });\n",
" }\n",
" </script>\n",
" "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import ast\n",
"import tldextract\n",
"import numpy\n",
"\n",
"import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n",
2021-03-22 19:08:20 +01:00
"import plotly.express as px\n",
2021-03-18 17:43:00 +01:00
"\n",
"init_notebook_mode(connected=True)\n",
2021-03-23 09:35:35 +01:00
"TOP_N = 0\n",
"TOP_RANGE = [0, 0]\n",
"def set_top_n(n):\n",
" global TOP_N, TOP_RANGE\n",
" TOP_N = n\n",
" TOP_RANGE = [-.5, n - 1 + .5]"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Notable solid ORCID iDs for explorative purposes:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"AM = '0000-0002-5193-7851'\n",
"PP = '0000-0002-8588-4196'\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-23 19:03:37 +01:00
"Notable anomalies:"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 3,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [],
"source": [
"JOURNAL = '0000-0003-1815-5732'\n",
2021-03-23 12:13:04 +01:00
"NOINFO = '0000-0001-5009-2052'\n",
"VALID_NO_OA = '0000-0002-5154-6404' # True profile, but not in OpenAIRE\n",
2021-03-22 19:08:20 +01:00
"# todo: find group-shared ORCiD, if possible"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-23 19:03:37 +01:00
"Notable fake ORCID iDs:"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"SCAFFOLD = '0000-0001-5004-7761'\n",
"WHATSAPP = '0000-0001-6997-9470'\n",
"PENIS = '0000-0002-3399-7287'\n",
"BITCOIN = '0000-0002-7518-6845'\n",
"FITNESS_CHINA = '0000-0002-1234-835X' # URL record + employment\n",
"CANNABIS = '0000-0002-9025-8632' # URL > 70 + works (REMOVED)\n",
"PLUMBER = '0000-0002-1700-8311' # URL > 10 + works "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load the dataset"
]
},
{
"cell_type": "code",
2021-03-22 19:08:20 +01:00
"execution_count": 6,
2021-03-23 19:03:37 +01:00
"metadata": {},
2021-03-18 17:43:00 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
2021-03-23 19:03:37 +01:00
" <th>...</th>\n",
2021-03-18 17:43:00 +01:00
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
2021-03-23 19:03:37 +01:00
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>0</th>\n",
" <td>0000-0001-5009-2052</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>2019-06-05t20:25:43.066z</td>\n",
" <td>2019-12-11t03:57:41.741z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>1</th>\n",
" <td>0000-0001-5943-0732</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>2015-08-18t13:10:42.871z</td>\n",
" <td>2016-06-15t01:05:19.986z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>2</th>\n",
" <td>0000-0001-6083-622x</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>2019-01-21t10:55:27.997z</td>\n",
" <td>2019-01-28t16:24:02.199z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>3</th>\n",
" <td>0000-0001-6262-5709</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>2015-08-18t14:29:39.440z</td>\n",
" <td>2017-06-21t07:18:20.787z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>4</th>\n",
" <td>0000-0001-6616-4890</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>2015-08-13t01:59:51.802z</td>\n",
" <td>2016-06-15t01:05:21.373z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2021-03-23 19:03:37 +01:00
"<p>5 rows × 24 columns</p>\n",
2021-03-18 17:43:00 +01:00
"</div>"
],
"text/plain": [
2021-03-23 19:03:37 +01:00
" orcid claimed verified_email verified_primary_email \\\n",
"0 0000-0001-5009-2052 1 1 1 \n",
"1 0000-0001-5943-0732 1 1 1 \n",
"2 0000-0001-6083-622x 1 1 1 \n",
"3 0000-0001-6262-5709 1 1 1 \n",
"4 0000-0001-6616-4890 1 1 1 \n",
"\n",
" given_names family_name biography other_names urls primary_email ... \\\n",
"0 NaN NaN NaN NaN NaN NaN ... \n",
"1 NaN NaN NaN NaN NaN NaN ... \n",
"2 NaN NaN NaN NaN NaN NaN ... \n",
"3 NaN NaN NaN NaN NaN NaN ... \n",
"4 NaN NaN NaN NaN NaN NaN ... \n",
"\n",
" employment n_works works_source activation_date \\\n",
"0 NaN 0 NaN 2019-06-05t20:25:43.066z \n",
"1 NaN 0 NaN 2015-08-18t13:10:42.871z \n",
"2 NaN 0 NaN 2019-01-21t10:55:27.997z \n",
"3 NaN 0 NaN 2015-08-18t14:29:39.440z \n",
"4 NaN 0 NaN 2015-08-13t01:59:51.802z \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids label \n",
"0 2019-12-11t03:57:41.741z 0 0 0 0 0 \n",
"1 2016-06-15t01:05:19.986z 0 0 0 0 0 \n",
"2 2019-01-28t16:24:02.199z 0 0 0 0 0 \n",
"3 2017-06-21t07:18:20.787z 0 0 0 0 0 \n",
"4 2016-06-15t01:05:21.373z 0 0 0 0 0 \n",
"\n",
"[5 rows x 24 columns]"
2021-03-18 17:43:00 +01:00
]
},
2021-03-22 19:08:20 +01:00
"execution_count": 6,
2021-03-18 17:43:00 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-23 19:03:37 +01:00
"df = pd.read_pickle('../data/processed/dataset.pkl')\n",
"df.head(5)"
2021-03-18 17:43:00 +01:00
]
},
2021-03-23 12:13:04 +01:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-23 19:03:37 +01:00
"Notable profiles inspection"
2021-03-23 12:13:04 +01:00
]
},
2021-03-18 17:43:00 +01:00
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 7,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
2021-03-23 19:03:37 +01:00
" <th>...</th>\n",
2021-03-18 17:43:00 +01:00
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
2021-03-23 19:03:37 +01:00
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>1575869</th>\n",
" <td>0000-0002-5193-7851</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>1</td>\n",
" <td>andrea</td>\n",
" <td>mannocci</td>\n",
" <td>data scientist &amp; researcher; scholarly knowled...</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[[personal website, https://andremann.github.i...</td>\n",
" <td>andrea.mannocci@isti.cnr.it</td>\n",
" <td>...</td>\n",
" <td>[[research associate, istituto di scienza e te...</td>\n",
" <td>37</td>\n",
" <td>[scopus - elsevier, crossref metadata search, ...</td>\n",
" <td>2017-09-12t14:28:33.467z</td>\n",
" <td>2021-03-09t08:32:47.840z</td>\n",
" <td>34</td>\n",
2021-03-18 17:43:00 +01:00
" <td>0</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>60</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
2021-03-23 19:03:37 +01:00
"<p>1 rows × 24 columns</p>\n",
2021-03-18 17:43:00 +01:00
"</div>"
],
"text/plain": [
2021-03-23 19:03:37 +01:00
" orcid claimed verified_email verified_primary_email \\\n",
"1575869 0000-0002-5193-7851 1 1 1 \n",
"\n",
" given_names family_name \\\n",
"1575869 andrea mannocci \n",
"\n",
" biography other_names \\\n",
"1575869 data scientist & researcher; scholarly knowled... NaN \n",
"\n",
" urls \\\n",
"1575869 [[personal website, https://andremann.github.i... \n",
"\n",
" primary_email ... \\\n",
"1575869 andrea.mannocci@isti.cnr.it ... \n",
"\n",
" employment n_works \\\n",
"1575869 [[research associate, istituto di scienza e te... 37 \n",
"\n",
" works_source \\\n",
"1575869 [scopus - elsevier, crossref metadata search, ... \n",
"\n",
" activation_date last_update_date n_doi n_arxiv \\\n",
"1575869 2017-09-12t14:28:33.467z 2021-03-09t08:32:47.840z 34 0 \n",
"\n",
" n_pmc n_other_pids label \n",
"1575869 0 60 1 \n",
"\n",
"[1 rows x 24 columns]"
2021-03-18 17:43:00 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 7,
2021-03-18 17:43:00 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-23 19:03:37 +01:00
"df[df['orcid'] == AM]"
2021-03-23 12:13:04 +01:00
]
},
2021-03-18 17:43:00 +01:00
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 8,
2021-03-23 12:13:04 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
2021-03-23 19:03:37 +01:00
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>...</th>\n",
" <th>employment</th>\n",
" <th>n_works</th>\n",
" <th>works_source</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
2021-03-23 12:13:04 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>6819986</th>\n",
" <td>0000-0001-6997-9470</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>other</td>\n",
" <td>whatsapp</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[otherwhatsapp, https://otherwhatsapp.com/], ...</td>\n",
" <td>NaN</td>\n",
2021-03-23 12:13:04 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-10-07t10:37:12.237z</td>\n",
" <td>2020-10-08t02:32:03.935z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-23 12:13:04 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2021-03-23 19:03:37 +01:00
"<p>1 rows × 24 columns</p>\n",
2021-03-23 12:13:04 +01:00
"</div>"
],
"text/plain": [
2021-03-23 19:03:37 +01:00
" orcid claimed verified_email verified_primary_email \\\n",
"6819986 0000-0001-6997-9470 1 1 1 \n",
"\n",
" given_names family_name biography other_names \\\n",
"6819986 other whatsapp NaN NaN \n",
"\n",
" urls primary_email ... \\\n",
"6819986 [[otherwhatsapp, https://otherwhatsapp.com/], ... NaN ... \n",
"\n",
" employment n_works works_source activation_date \\\n",
"6819986 NaN 0 NaN 2020-10-07t10:37:12.237z \n",
"\n",
" last_update_date n_doi n_arxiv n_pmc n_other_pids label \n",
"6819986 2020-10-08t02:32:03.935z 0 0 0 0 0 \n",
"\n",
"[1 rows x 24 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['orcid'] == WHATSAPP]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"orcid 10916574\n",
"claimed 10916574\n",
"verified_email 10916574\n",
"verified_primary_email 10916574\n",
"given_names 10886150\n",
"family_name 10601571\n",
"biography 348649\n",
"other_names 551482\n",
"urls 707687\n",
"primary_email 123851\n",
"other_emails 48306\n",
"keywords 646400\n",
"external_ids 1301959\n",
"education 2430233\n",
"employment 2665092\n",
"n_works 10916574\n",
"works_source 2721431\n",
"activation_date 10916574\n",
"last_update_date 10916574\n",
"n_doi 10916574\n",
"n_arxiv 10916574\n",
"n_pmc 10916574\n",
"n_other_pids 10916574\n",
"label 10916574\n",
"dtype: int64"
2021-03-23 12:13:04 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 9,
2021-03-23 12:13:04 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-23 19:03:37 +01:00
"df.count()"
2021-03-23 12:13:04 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 10,
2021-03-23 12:13:04 +01:00
"metadata": {},
2021-03-23 19:03:37 +01:00
"outputs": [
{
"data": {
"text/plain": [
"count 10916574\n",
"unique 10916574\n",
"top 0000-0002-5454-7613\n",
"freq 1\n",
"Name: orcid, dtype: object"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-23 12:13:04 +01:00
"source": [
2021-03-23 19:03:37 +01:00
"df['orcid'].describe()"
2021-03-23 12:13:04 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-23 19:03:37 +01:00
"## Primary email"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 123851\n",
"unique 123848\n",
"top patrick.davey@monash.edu\n",
"freq 2\n",
"Name: primary_email, dtype: object"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['primary_email'].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Dupe emails"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"6347224 maykin@owasp.org\n",
"7027865 patrick.davey@monash.edu\n",
"9529005 opercin@erbakan.edu.tr\n",
"Name: primary_email, dtype: object"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['primary_email'].dropna().loc[df['primary_email'].duplicated()]"
2021-03-23 12:13:04 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 22,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
2021-03-23 12:13:04 +01:00
" <th>...</th>\n",
2021-03-18 17:43:00 +01:00
" <th>n_works</th>\n",
" <th>works_source</th>\n",
2021-03-23 19:03:37 +01:00
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
2021-03-23 12:13:04 +01:00
" <th>label</th>\n",
2021-03-23 19:03:37 +01:00
" <th>primary_email_domain</th>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>4450046</th>\n",
" <td>0000-0001-9855-1676</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>maykin</td>\n",
" <td>warasart</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 12:13:04 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>maykin@owasp.org</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
2021-03-23 12:13:04 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>2020-10-23t17:51:51.925z</td>\n",
" <td>2021-01-01t15:00:52.053z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>owasp.org</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6347224</th>\n",
" <td>0000-0002-0836-2271</td>\n",
2021-03-23 12:13:04 +01:00
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>maykin</td>\n",
" <td>warasart</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>maykin@owasp.org</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>2020-09-15t04:43:55.709z</td>\n",
" <td>2020-09-15t05:17:28.509z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>owasp.org</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2021-03-23 19:03:37 +01:00
"<p>2 rows × 25 columns</p>\n",
2021-03-18 17:43:00 +01:00
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
2021-03-23 19:03:37 +01:00
"4450046 0000-0001-9855-1676 1 1 1 \n",
"6347224 0000-0002-0836-2271 1 1 1 \n",
2021-03-18 17:43:00 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" given_names family_name biography other_names urls primary_email \\\n",
"4450046 maykin warasart NaN NaN NaN maykin@owasp.org \n",
"6347224 maykin warasart NaN NaN NaN maykin@owasp.org \n",
2021-03-18 17:43:00 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" ... n_works works_source activation_date \\\n",
"4450046 ... 0 NaN 2020-10-23t17:51:51.925z \n",
"6347224 ... 0 NaN 2020-09-15t04:43:55.709z \n",
2021-03-18 17:43:00 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" last_update_date n_doi n_arxiv n_pmc n_other_pids label \\\n",
"4450046 2021-01-01t15:00:52.053z 0 0 0 0 0 \n",
"6347224 2020-09-15t05:17:28.509z 0 0 0 0 0 \n",
2021-03-18 17:43:00 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" primary_email_domain \n",
"4450046 owasp.org \n",
"6347224 owasp.org \n",
2021-03-18 17:43:00 +01:00
"\n",
2021-03-23 19:03:37 +01:00
"[2 rows x 25 columns]"
2021-03-18 17:43:00 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 22,
2021-03-18 17:43:00 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-23 19:03:37 +01:00
"df[df['primary_email'] == 'maykin@owasp.org']"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 23,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
2021-03-23 12:13:04 +01:00
" <th>...</th>\n",
2021-03-18 17:43:00 +01:00
" <th>n_works</th>\n",
" <th>works_source</th>\n",
2021-03-23 19:03:37 +01:00
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
2021-03-23 12:13:04 +01:00
" <th>label</th>\n",
2021-03-23 19:03:37 +01:00
" <th>primary_email_domain</th>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>6840791</th>\n",
" <td>0000-0002-2232-9638</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>osman</td>\n",
" <td>perçin</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>opercin@erbakan.edu.tr</td>\n",
2021-03-23 12:13:04 +01:00
" <td>...</td>\n",
" <td>0</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>2015-01-12t13:47:55.549z</td>\n",
" <td>2020-01-27t07:38:24.269z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>erbakan.edu.tr</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9529005</th>\n",
" <td>0000-0003-0033-0918</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>osman</td>\n",
" <td>perçin</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>opercin@erbakan.edu.tr</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>2015-10-13t05:47:12.014z</td>\n",
" <td>2020-12-25t13:52:03.976z</td>\n",
" <td>0</td>\n",
2021-03-23 12:13:04 +01:00
" <td>0</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>erbakan.edu.tr</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2021-03-23 19:03:37 +01:00
"<p>2 rows × 25 columns</p>\n",
2021-03-18 17:43:00 +01:00
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
2021-03-23 19:03:37 +01:00
"6840791 0000-0002-2232-9638 1 1 1 \n",
"9529005 0000-0003-0033-0918 1 1 1 \n",
2021-03-18 17:43:00 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" given_names family_name biography other_names urls \\\n",
"6840791 osman perçin NaN NaN NaN \n",
"9529005 osman perçin NaN NaN NaN \n",
2021-03-23 12:13:04 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" primary_email ... n_works works_source \\\n",
"6840791 opercin@erbakan.edu.tr ... 0 NaN \n",
"9529005 opercin@erbakan.edu.tr ... 0 NaN \n",
2021-03-23 12:13:04 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" activation_date last_update_date n_doi n_arxiv \\\n",
"6840791 2015-01-12t13:47:55.549z 2020-01-27t07:38:24.269z 0 0 \n",
"9529005 2015-10-13t05:47:12.014z 2020-12-25t13:52:03.976z 0 0 \n",
2021-03-18 17:43:00 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" n_pmc n_other_pids label primary_email_domain \n",
"6840791 0 0 0 erbakan.edu.tr \n",
"9529005 0 0 0 erbakan.edu.tr \n",
2021-03-18 17:43:00 +01:00
"\n",
2021-03-23 19:03:37 +01:00
"[2 rows x 25 columns]"
2021-03-18 17:43:00 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 23,
2021-03-18 17:43:00 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-23 19:03:37 +01:00
"df[df['primary_email'] == 'opercin@erbakan.edu.tr']"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 24,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
2021-03-23 12:13:04 +01:00
" <th>...</th>\n",
2021-03-18 17:43:00 +01:00
" <th>n_works</th>\n",
" <th>works_source</th>\n",
2021-03-23 19:03:37 +01:00
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
2021-03-23 12:13:04 +01:00
" <th>label</th>\n",
2021-03-23 19:03:37 +01:00
" <th>primary_email_domain</th>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>944993</th>\n",
" <td>0000-0002-9158-1757</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>patrick</td>\n",
" <td>davey</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>patrick.davey@monash.edu</td>\n",
2021-03-23 12:13:04 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>2019-05-09t23:01:02.170z</td>\n",
" <td>2019-08-20t03:00:17.844z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>monash.edu</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7027865</th>\n",
" <td>0000-0002-8774-0030</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>patrick</td>\n",
" <td>davey</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>patrick.davey@monash.edu</td>\n",
" <td>...</td>\n",
" <td>1</td>\n",
" <td>[crossref]</td>\n",
" <td>2018-09-11t10:47:10.997z</td>\n",
" <td>2021-02-09t06:21:44.138z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>monash.edu</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2021-03-23 19:03:37 +01:00
"<p>2 rows × 25 columns</p>\n",
2021-03-18 17:43:00 +01:00
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
2021-03-23 19:03:37 +01:00
"944993 0000-0002-9158-1757 1 1 1 \n",
"7027865 0000-0002-8774-0030 1 1 1 \n",
2021-03-18 17:43:00 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" given_names family_name biography other_names urls \\\n",
"944993 patrick davey NaN NaN NaN \n",
"7027865 patrick davey NaN NaN NaN \n",
2021-03-18 17:43:00 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" primary_email ... n_works works_source \\\n",
"944993 patrick.davey@monash.edu ... 0 NaN \n",
"7027865 patrick.davey@monash.edu ... 1 [crossref] \n",
2021-03-18 17:43:00 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" activation_date last_update_date n_doi n_arxiv \\\n",
"944993 2019-05-09t23:01:02.170z 2019-08-20t03:00:17.844z 0 0 \n",
"7027865 2018-09-11t10:47:10.997z 2021-02-09t06:21:44.138z 1 0 \n",
2021-03-18 17:43:00 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" n_pmc n_other_pids label primary_email_domain \n",
"944993 0 0 0 monash.edu \n",
"7027865 0 0 1 monash.edu \n",
2021-03-18 17:43:00 +01:00
"\n",
2021-03-23 19:03:37 +01:00
"[2 rows x 25 columns]"
2021-03-18 17:43:00 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 24,
2021-03-18 17:43:00 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-23 19:03:37 +01:00
"df[df['primary_email'] == 'patrick.davey@monash.edu']"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 25,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [],
"source": [
2021-03-23 19:03:37 +01:00
"df['primary_email_domain'] = df['primary_email'].apply(lambda x: x.split('@')[1] if pd.notna(x) else x)"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 26,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2021-03-23 19:03:37 +01:00
"count 123851\n",
"unique 17089\n",
"top gmail.com\n",
"freq 26540\n",
"Name: primary_email_domain, dtype: object"
2021-03-18 17:43:00 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 26,
2021-03-18 17:43:00 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-23 19:03:37 +01:00
"df['primary_email_domain'].describe()"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 27,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
2021-03-23 19:03:37 +01:00
" </tr>\n",
" <tr>\n",
" <th>primary_email_domain</th>\n",
" <th></th>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>gmail.com</th>\n",
" <td>26540</td>\n",
" </tr>\n",
" <tr>\n",
" <th>hotmail.com</th>\n",
" <td>3769</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yahoo.com</th>\n",
" <td>2614</td>\n",
" </tr>\n",
" <tr>\n",
" <th>163.com</th>\n",
" <td>2109</td>\n",
" </tr>\n",
" <tr>\n",
" <th>yuhs.ac</th>\n",
" <td>1132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>imean-biotech.com</th>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" </tr>\n",
" <tr>\n",
" <th>imec.msu.ru</th>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>imedea.uib-csic.es</th>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" </tr>\n",
" <tr>\n",
" <th>imes.uni-hannover.de</th>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" </tr>\n",
" <tr>\n",
" <th>zzuli.edu.cn</th>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
2021-03-23 19:03:37 +01:00
"<p>17089 rows × 1 columns</p>\n",
2021-03-18 17:43:00 +01:00
"</div>"
],
"text/plain": [
2021-03-23 19:03:37 +01:00
" orcid\n",
"primary_email_domain \n",
"gmail.com 26540\n",
"hotmail.com 3769\n",
"yahoo.com 2614\n",
"163.com 2109\n",
"yuhs.ac 1132\n",
"... ...\n",
"imean-biotech.com 1\n",
"imec.msu.ru 1\n",
"imedea.uib-csic.es 1\n",
"imes.uni-hannover.de 1\n",
"zzuli.edu.cn 1\n",
2021-03-18 17:43:00 +01:00
"\n",
2021-03-23 19:03:37 +01:00
"[17089 rows x 1 columns]"
2021-03-18 17:43:00 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 27,
2021-03-18 17:43:00 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-23 19:03:37 +01:00
"primary_emails = df[['primary_email_domain', 'orcid']].groupby('primary_email_domain').count().sort_values('orcid', ascending=False)\n",
"primary_emails"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 28,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
2021-03-23 10:20:23 +01:00
"x": [
"gmail.com",
"hotmail.com",
"yahoo.com",
"163.com",
"yuhs.ac",
"qq.com",
"outlook.com",
"126.com",
"bu.edu",
"usgs.gov",
"mail.ru",
"yahoo.com.br",
2021-03-23 19:03:37 +01:00
"usp.br",
2021-03-23 10:20:23 +01:00
"ua.pt",
"umich.edu",
"ust.hk",
"foxmail.com",
"uomustansiriyah.edu.iq",
"yandex.ru",
"uq.edu.au",
"ukr.net",
"unesp.br",
"ucl.ac.uk",
"ieee.org",
"naver.com",
2021-03-23 19:03:37 +01:00
"st-annes.ox.ac.uk",
"stcatz.ox.ac.uk",
2021-03-23 10:20:23 +01:00
"yahoo.fr",
"ucm.es",
"live.com"
],
"y": [
2021-03-23 19:03:37 +01:00
26540,
3769,
2614,
2109,
1132,
1056,
940,
762,
630,
2021-03-23 10:20:23 +01:00
584,
2021-03-23 19:03:37 +01:00
575,
458,
457,
300,
2021-03-23 10:20:23 +01:00
290,
2021-03-23 19:03:37 +01:00
277,
258,
247,
2021-03-23 10:20:23 +01:00
242,
2021-03-23 19:03:37 +01:00
235,
225,
218,
207,
2021-03-23 10:20:23 +01:00
204,
2021-03-23 19:03:37 +01:00
187,
184,
2021-03-23 10:20:23 +01:00
184,
172,
2021-03-23 19:03:37 +01:00
171,
163
2021-03-23 10:20:23 +01:00
]
2021-03-18 17:43:00 +01:00
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
2021-03-23 10:20:23 +01:00
"text": "Top 30 email domains"
2021-03-18 17:43:00 +01:00
},
"xaxis": {
"range": [
2021-03-23 10:20:23 +01:00
-0.5,
29.5
2021-03-18 17:43:00 +01:00
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
2021-03-23 19:03:37 +01:00
"<div> <div id=\"15d3677b-72ef-4927-9ec5-b34a376fc263\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"15d3677b-72ef-4927-9ec5-b34a376fc263\")) { Plotly.newPlot( \"15d3677b-72ef-4927-9ec5-b34a376fc263\", [{\"type\": \"bar\", \"x\": [\"gmail.com\", \"hotmail.com\", \"yahoo.com\", \"163.com\", \"yuhs.ac\", \"qq.com\", \"outlook.com\", \"126.com\", \"bu.edu\", \"usgs.gov\", \"mail.ru\", \"yahoo.com.br\", \"usp.br\", \"ua.pt\", \"umich.edu\", \"ust.hk\", \"foxmail.com\", \"uomustansiriyah.edu.iq\", \"yandex.ru\", \"uq.edu.au\", \"ukr.net\", \"unesp.br\", \"ucl.ac.uk\", \"ieee.org\", \"naver.com\", \"st-annes.ox.ac.uk\", \"stcatz.ox.ac.uk\", \"yahoo.fr\", \"ucm.es\", \"live.com\"], \"y\": [26540, 3769, 2614, 2109, 1132, 1056, 940, 762, 630, 584, 575, 458, 457, 300, 290, 277, 258, 247, 242, 235, 225, 218, 207, 204, 187, 184, 184, 172, 171, 163]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46
2021-03-18 17:43:00 +01:00
" \n",
2021-03-23 19:03:37 +01:00
"var gd = document.getElementById('15d3677b-72ef-4927-9ec5-b34a376fc263');\n",
2021-03-18 17:43:00 +01:00
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2021-03-23 09:35:35 +01:00
"set_top_n(30)\n",
2021-03-18 17:43:00 +01:00
"data = [\n",
" go.Bar(\n",
2021-03-23 10:20:43 +01:00
" x=primary_emails.sort_values(by=['orcid'], ascending=False).index[:TOP_N],\n",
" y=primary_emails.sort_values(by=['orcid'], ascending=False)['orcid'][:TOP_N]\n",
2021-03-18 17:43:00 +01:00
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
2021-03-23 09:35:35 +01:00
" title='Top %s email domains' % TOP_N,\n",
2021-03-18 17:43:00 +01:00
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Other emails"
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 29,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [],
"source": [
"def extract_email_domains(lst):\n",
" res = []\n",
" for email in lst:\n",
" res.append(email.split('@')[1])\n",
" return res"
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 30,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [],
"source": [
"df['other_email_domains'] = df['other_emails'].apply(lambda x: extract_email_domains(x) if isinstance(x, list) else x)"
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 31,
2021-03-22 19:08:20 +01:00
"metadata": {
"scrolled": true
},
2021-03-18 17:43:00 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
2021-03-23 19:03:37 +01:00
" <th>...</th>\n",
2021-03-18 17:43:00 +01:00
" <th>works_source</th>\n",
2021-03-23 19:03:37 +01:00
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
2021-03-18 17:43:00 +01:00
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>34</th>\n",
2021-03-23 19:03:37 +01:00
" <td>0000-0002-5774-8947</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[omah m. williams - duncan]</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>2014-03-07t04:34:39.598z</td>\n",
" <td>2019-05-21t17:08:12.202z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-18 17:43:00 +01:00
" <td>0</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[gmail.com]</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>1199</th>\n",
" <td>0000-0003-2877-5492</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>aliasghar</td>\n",
" <td>khosroabadi</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>khosroedc@yahoo.com</td>\n",
" <td>...</td>\n",
" <td>[scopus - elsevier]</td>\n",
" <td>2018-01-19t13:40:29.874z</td>\n",
" <td>2019-12-11t02:19:08.160z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>yahoo.com</td>\n",
" <td>[medsab.ac.ir, gmail.com]</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>1995</th>\n",
" <td>0000-0001-8004-5054</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>angiola</td>\n",
" <td>orlando</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>angiola.orlando@mib.infn.it</td>\n",
" <td>...</td>\n",
" <td>[angiola orlando, crossref]</td>\n",
" <td>2015-08-31t09:12:02.349z</td>\n",
" <td>2020-06-22t14:22:31.786z</td>\n",
" <td>59</td>\n",
" <td>2</td>\n",
2021-03-18 17:43:00 +01:00
" <td>0</td>\n",
2021-03-23 19:03:37 +01:00
" <td>53</td>\n",
" <td>1</td>\n",
" <td>mib.infn.it</td>\n",
" <td>[ge.infn.it]</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>2323</th>\n",
" <td>0000-0003-3048-4504</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>apichat</td>\n",
" <td>saejio</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
" <td>[scopus - elsevier]</td>\n",
" <td>2016-03-06t08:54:15.121z</td>\n",
" <td>2020-08-28t08:31:15.790z</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
2021-03-18 17:43:00 +01:00
" <td>0</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[eat.kmutnb.ac.th]</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>4461</th>\n",
" <td>0000-0001-9961-9732</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>chunfeng</td>\n",
" <td>yun</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>sallyycf@163.com</td>\n",
" <td>...</td>\n",
" <td>[multidisciplinary digital publishing institut...</td>\n",
" <td>2016-11-22t07:55:23.863z</td>\n",
" <td>2019-11-26t02:29:35.104z</td>\n",
" <td>5</td>\n",
2021-03-18 17:43:00 +01:00
" <td>0</td>\n",
2021-03-23 19:03:37 +01:00
" <td>9</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>163.com</td>\n",
" <td>[pku.edu.cn]</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2021-03-23 19:03:37 +01:00
"<p>5 rows × 26 columns</p>\n",
2021-03-18 17:43:00 +01:00
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
2021-03-23 19:03:37 +01:00
"34 0000-0002-5774-8947 1 1 1 \n",
"1199 0000-0003-2877-5492 1 1 0 \n",
"1995 0000-0001-8004-5054 1 1 1 \n",
"2323 0000-0003-3048-4504 1 1 1 \n",
"4461 0000-0001-9961-9732 1 1 1 \n",
"\n",
" given_names family_name biography other_names urls \\\n",
"34 NaN NaN NaN [omah m. williams - duncan] NaN \n",
"1199 aliasghar khosroabadi NaN NaN NaN \n",
"1995 angiola orlando NaN NaN NaN \n",
"2323 apichat saejio NaN NaN NaN \n",
"4461 chunfeng yun NaN NaN NaN \n",
"\n",
" primary_email ... \\\n",
"34 NaN ... \n",
"1199 khosroedc@yahoo.com ... \n",
"1995 angiola.orlando@mib.infn.it ... \n",
"2323 NaN ... \n",
"4461 sallyycf@163.com ... \n",
"\n",
" works_source \\\n",
"34 NaN \n",
"1199 [scopus - elsevier] \n",
"1995 [angiola orlando, crossref] \n",
"2323 [scopus - elsevier] \n",
"4461 [multidisciplinary digital publishing institut... \n",
"\n",
" activation_date last_update_date n_doi n_arxiv n_pmc \\\n",
"34 2014-03-07t04:34:39.598z 2019-05-21t17:08:12.202z 0 0 0 \n",
"1199 2018-01-19t13:40:29.874z 2019-12-11t02:19:08.160z 0 0 0 \n",
"1995 2015-08-31t09:12:02.349z 2020-06-22t14:22:31.786z 59 2 0 \n",
"2323 2016-03-06t08:54:15.121z 2020-08-28t08:31:15.790z 2 0 0 \n",
"4461 2016-11-22t07:55:23.863z 2019-11-26t02:29:35.104z 5 0 9 \n",
"\n",
" n_other_pids label primary_email_domain other_email_domains \n",
"34 0 0 NaN [gmail.com] \n",
"1199 1 1 yahoo.com [medsab.ac.ir, gmail.com] \n",
"1995 53 1 mib.infn.it [ge.infn.it] \n",
"2323 4 0 NaN [eat.kmutnb.ac.th] \n",
"4461 0 1 163.com [pku.edu.cn] \n",
"\n",
"[5 rows x 26 columns]"
2021-03-18 17:43:00 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 31,
2021-03-18 17:43:00 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['other_email_domains'].notna()].head()"
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 32,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [],
"source": [
2021-03-22 19:08:20 +01:00
"df['n_emails'] = df['other_emails'].str.len()"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 33,
2021-03-18 17:43:00 +01:00
"metadata": {},
2021-03-23 10:20:43 +01:00
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"0000-0003-4171-3835",
"0000-0001-6239-2968",
"0000-0003-2151-4089",
2021-03-23 19:03:37 +01:00
"0000-0003-2290-2817",
2021-03-23 10:20:43 +01:00
"0000-0001-9084-3156",
"0000-0001-6349-1044",
"0000-0002-2085-1908",
2021-03-23 19:03:37 +01:00
"0000-0003-4147-212x",
"0000-0002-8565-194x",
"0000-0002-7396-1561",
"0000-0002-9821-8424",
2021-03-23 10:20:43 +01:00
"0000-0003-4327-6827",
2021-03-23 19:03:37 +01:00
"0000-0001-9311-0687",
2021-03-23 10:20:43 +01:00
"0000-0003-0391-3430",
2021-03-23 19:03:37 +01:00
"0000-0002-0776-9547",
2021-03-23 10:20:43 +01:00
"0000-0003-2657-8225",
2021-03-23 19:03:37 +01:00
"0000-0001-5548-8259",
"0000-0003-0671-1543",
2021-03-23 10:20:43 +01:00
"0000-0003-1502-3910",
2021-03-23 19:03:37 +01:00
"0000-0003-4685-5621",
"0000-0002-3165-132x",
"0000-0001-8420-9204",
"0000-0002-1929-6054",
"0000-0002-8390-8238",
2021-03-23 10:20:43 +01:00
"0000-0002-9599-6909",
"0000-0002-5341-6531",
"0000-0003-4499-7300",
"0000-0002-1615-8633",
2021-03-23 19:03:37 +01:00
"0000-0002-6206-4638",
"0000-0003-3405-355x"
2021-03-23 10:20:43 +01:00
],
"y": [
12,
9,
7,
7,
6,
6,
6,
6,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
2021-03-23 19:03:37 +01:00
5,
5,
2021-03-23 10:20:43 +01:00
4,
4
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 30 ORCiD by email"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
2021-03-23 19:03:37 +01:00
"<div> <div id=\"af09fc26-d772-4201-a866-00d6c313371e\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"af09fc26-d772-4201-a866-00d6c313371e\")) { Plotly.newPlot( \"af09fc26-d772-4201-a866-00d6c313371e\", [{\"type\": \"bar\", \"x\": [\"0000-0003-4171-3835\", \"0000-0001-6239-2968\", \"0000-0003-2151-4089\", \"0000-0003-2290-2817\", \"0000-0001-9084-3156\", \"0000-0001-6349-1044\", \"0000-0002-2085-1908\", \"0000-0003-4147-212x\", \"0000-0002-8565-194x\", \"0000-0002-7396-1561\", \"0000-0002-9821-8424\", \"0000-0003-4327-6827\", \"0000-0001-9311-0687\", \"0000-0003-0391-3430\", \"0000-0002-0776-9547\", \"0000-0003-2657-8225\", \"0000-0001-5548-8259\", \"0000-0003-0671-1543\", \"0000-0003-1502-3910\", \"0000-0003-4685-5621\", \"0000-0002-3165-132x\", \"0000-0001-8420-9204\", \"0000-0002-1929-6054\", \"0000-0002-8390-8238\", \"0000-0002-9599-6909\", \"0000-0002-5341-6531\", \"0000-0003-4499-7300\", \"0000-0002-1615-8633\", \"0000-0002-6206-4638\", \"0000-0003-3405-355x\"], \"y\": [12.0, 9.0, 7.0, 7.0, 6.0, 6.0, 6.0, 6.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.0, 4.0]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.666
2021-03-23 10:20:43 +01:00
" \n",
2021-03-23 19:03:37 +01:00
"var gd = document.getElementById('af09fc26-d772-4201-a866-00d6c313371e');\n",
2021-03-23 10:20:43 +01:00
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2021-03-18 17:43:00 +01:00
"source": [
2021-03-23 10:20:23 +01:00
"set_top_n(30)\n",
"data = [\n",
" go.Bar(\n",
" x=df.sort_values('n_emails', ascending=False)['orcid'][:TOP_N],\n",
" y=df.sort_values('n_emails', ascending=False)['n_emails'][:TOP_N]\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top %s ORCiD by email' % TOP_N, \n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 34,
2021-03-22 19:08:20 +01:00
"metadata": {},
"outputs": [],
"source": [
"grouped_other_emails = df[['orcid', 'other_email_domains']]\\\n",
" .explode('other_email_domains')\\\n",
" .reset_index(drop=True)\\\n",
" .groupby('other_email_domains')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)"
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 35,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"gmail.com",
"hotmail.com",
"yahoo.com",
"qq.com",
2021-03-23 19:03:37 +01:00
"163.com",
2021-03-18 17:43:00 +01:00
"outlook.com",
"126.com",
"usp.br",
"ieee.org",
"mail.ru",
"yahoo.com.br",
"unesp.br",
"sbs.ox.ac.uk",
"yuhs.ac",
"naver.com",
"icloud.com",
"foxmail.com",
2021-03-23 19:03:37 +01:00
"uq.edu.au",
"ua.pt",
2021-03-18 17:43:00 +01:00
"cam.ac.uk",
2021-03-23 19:03:37 +01:00
"imperial.ac.uk",
2021-03-18 17:43:00 +01:00
"ukr.net",
"law.ox.ac.uk",
"mit.edu",
2021-03-23 19:03:37 +01:00
"stanford.edu",
2021-03-18 17:43:00 +01:00
"monash.edu",
"ucl.ac.uk",
"education.ox.ac.uk",
"ucm.es",
"conted.ox.ac.uk"
],
"y": [
2021-03-23 19:03:37 +01:00
11116,
1541,
1295,
779,
774,
425,
260,
236,
224,
149,
2021-03-18 17:43:00 +01:00
147,
2021-03-23 19:03:37 +01:00
141,
2021-03-18 17:43:00 +01:00
136,
2021-03-23 19:03:37 +01:00
133,
2021-03-18 17:43:00 +01:00
130,
2021-03-23 19:03:37 +01:00
118,
96,
94,
89,
84,
77,
2021-03-18 17:43:00 +01:00
76,
75,
74,
2021-03-23 19:03:37 +01:00
71,
70,
68,
2021-03-18 17:43:00 +01:00
67,
66,
64
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 30 other email domains"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
2021-03-23 19:03:37 +01:00
"<div> <div id=\"f62e82ae-82a4-402b-9759-c47003e4a6bc\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"f62e82ae-82a4-402b-9759-c47003e4a6bc\")) { Plotly.newPlot( \"f62e82ae-82a4-402b-9759-c47003e4a6bc\", [{\"type\": \"bar\", \"x\": [\"gmail.com\", \"hotmail.com\", \"yahoo.com\", \"qq.com\", \"163.com\", \"outlook.com\", \"126.com\", \"usp.br\", \"ieee.org\", \"mail.ru\", \"yahoo.com.br\", \"unesp.br\", \"sbs.ox.ac.uk\", \"yuhs.ac\", \"naver.com\", \"icloud.com\", \"foxmail.com\", \"uq.edu.au\", \"ua.pt\", \"cam.ac.uk\", \"imperial.ac.uk\", \"ukr.net\", \"law.ox.ac.uk\", \"mit.edu\", \"stanford.edu\", \"monash.edu\", \"ucl.ac.uk\", \"education.ox.ac.uk\", \"ucm.es\", \"conted.ox.ac.uk\"], \"y\": [11116, 1541, 1295, 779, 774, 425, 260, 236, 224, 149, 147, 141, 136, 133, 130, 118, 96, 94, 89, 84, 77, 76, 75, 74, 71, 70, 68, 67, 66, 64]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"
2021-03-18 17:43:00 +01:00
" \n",
2021-03-23 19:03:37 +01:00
"var gd = document.getElementById('f62e82ae-82a4-402b-9759-c47003e4a6bc');\n",
2021-03-18 17:43:00 +01:00
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2021-03-23 09:35:35 +01:00
"set_top_n(30)\n",
2021-03-18 17:43:00 +01:00
"data = [\n",
" go.Bar(\n",
2021-03-23 10:20:43 +01:00
" x=grouped_other_emails.sort_values(by=['orcid'], ascending=False).index[:TOP_N],\n",
" y=grouped_other_emails.sort_values(by=['orcid'], ascending=False)['orcid'][:TOP_N]\n",
2021-03-18 17:43:00 +01:00
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
2021-03-23 09:35:35 +01:00
" title='Top %s other email domains' % TOP_N, \n",
2021-03-18 17:43:00 +01:00
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Email speculation"
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 36,
2021-03-22 19:08:20 +01:00
"metadata": {
"scrolled": true
},
2021-03-18 17:43:00 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
2021-03-23 19:03:37 +01:00
" <th>...</th>\n",
" <th>activation_date</th>\n",
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
2021-03-18 17:43:00 +01:00
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
2021-03-22 19:08:20 +01:00
" <th>n_emails</th>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>34</th>\n",
" <td>0000-0002-5774-8947</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[omah m. williams - duncan]</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
" <td>2014-03-07t04:34:39.598z</td>\n",
" <td>2019-05-21t17:08:12.202z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[gmail.com]</td>\n",
2021-03-22 19:08:20 +01:00
" <td>1.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>2323</th>\n",
" <td>0000-0003-3048-4504</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>apichat</td>\n",
" <td>saejio</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
" <td>2016-03-06t08:54:15.121z</td>\n",
" <td>2020-08-28t08:31:15.790z</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
2021-03-18 17:43:00 +01:00
" <td>0</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[eat.kmutnb.ac.th]</td>\n",
2021-03-22 19:08:20 +01:00
" <td>1.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>7622</th>\n",
" <td>0000-0002-5612-7444</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>friederike m.</td>\n",
" <td>hesse</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[[midwifery care - milla hebammenpraxis, http:...</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
" <td>2017-06-10t07:45:11.387z</td>\n",
" <td>2017-06-10t07:55:03.455z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[gmail.com, dghwi.de]</td>\n",
" <td>2.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>7956</th>\n",
" <td>0000-0002-8943-0538</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>geo</td>\n",
" <td>sunny</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
" <td>2019-11-30t14:08:11.221z</td>\n",
" <td>2020-05-15t09:06:25.637z</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>[students.cutn.ac.in]</td>\n",
2021-03-22 19:08:20 +01:00
" <td>1.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10508</th>\n",
" <td>0000-0002-4022-0580</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>jean carlos</td>\n",
" <td>da silva gomes</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[[currículo lattes, http://lattes.cnpq.br/0026...</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
" <td>2017-05-26t19:09:33.432z</td>\n",
" <td>2020-06-02t00:23:14.020z</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[letras.ufrj.br]</td>\n",
2021-03-22 19:08:20 +01:00
" <td>1.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
2021-03-22 19:08:20 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10915002</th>\n",
" <td>0000-0002-3715-3866</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>joanna</td>\n",
" <td>korybut-orlowska</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[joanna gołębiewska]</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
" <td>2017-04-27t10:08:48.102z</td>\n",
" <td>2020-12-08t09:44:59.088z</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[gmail.com]</td>\n",
2021-03-22 19:08:20 +01:00
" <td>1.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10915305</th>\n",
" <td>0000-0003-1925-0141</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>marco</td>\n",
" <td>ferretti</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
" <td>2015-02-23t10:29:00.543z</td>\n",
" <td>2020-11-30t21:58:07.439z</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" <td>1</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[itabc.cnr.it]</td>\n",
2021-03-22 19:08:20 +01:00
" <td>1.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10915495</th>\n",
" <td>0000-0001-5526-3017</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>nadia</td>\n",
" <td>yacoubi</td>\n",
" <td>NaN</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>2015-03-10t16:45:31.974z</td>\n",
" <td>2020-12-11t00:00:01.060z</td>\n",
2021-03-18 17:43:00 +01:00
" <td>3</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[evonik.com]</td>\n",
2021-03-22 19:08:20 +01:00
" <td>1.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10915820</th>\n",
" <td>0000-0002-9902-7953</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>s m mahmudul</td>\n",
" <td>hasan</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
" <td>2018-01-26t02:18:25.551z</td>\n",
" <td>2020-11-24t05:37:24.167z</td>\n",
" <td>7</td>\n",
2021-03-18 17:43:00 +01:00
" <td>0</td>\n",
2021-03-23 19:03:37 +01:00
" <td>2</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[gmail.com]</td>\n",
2021-03-22 19:08:20 +01:00
" <td>1.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10916306</th>\n",
" <td>0000-0002-5126-5127</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>andonis</td>\n",
" <td>neophytou</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
" <td>2017-03-30t17:08:15.383z</td>\n",
" <td>2020-12-09t16:16:50.762z</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>[ucy.ac.cy]</td>\n",
2021-03-22 19:08:20 +01:00
" <td>1.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2021-03-23 19:03:37 +01:00
"<p>19692 rows × 27 columns</p>\n",
2021-03-18 17:43:00 +01:00
"</div>"
],
"text/plain": [
" orcid claimed verified_email \\\n",
2021-03-23 19:03:37 +01:00
"34 0000-0002-5774-8947 1 1 \n",
"2323 0000-0003-3048-4504 1 1 \n",
"7622 0000-0002-5612-7444 1 1 \n",
"7956 0000-0002-8943-0538 1 1 \n",
"10508 0000-0002-4022-0580 1 1 \n",
2021-03-18 17:43:00 +01:00
"... ... ... ... \n",
2021-03-23 19:03:37 +01:00
"10915002 0000-0002-3715-3866 1 1 \n",
"10915305 0000-0003-1925-0141 1 1 \n",
"10915495 0000-0001-5526-3017 1 1 \n",
"10915820 0000-0002-9902-7953 1 1 \n",
"10916306 0000-0002-5126-5127 1 1 \n",
"\n",
" verified_primary_email given_names family_name biography \\\n",
"34 1 NaN NaN NaN \n",
"2323 1 apichat saejio NaN \n",
"7622 1 friederike m. hesse NaN \n",
"7956 1 geo sunny NaN \n",
"10508 1 jean carlos da silva gomes NaN \n",
"... ... ... ... ... \n",
"10915002 1 joanna korybut-orlowska NaN \n",
"10915305 1 marco ferretti NaN \n",
"10915495 1 nadia yacoubi NaN \n",
"10915820 1 s m mahmudul hasan NaN \n",
"10916306 1 andonis neophytou NaN \n",
"\n",
" other_names \\\n",
"34 [omah m. williams - duncan] \n",
"2323 NaN \n",
"7622 NaN \n",
"7956 NaN \n",
"10508 NaN \n",
"... ... \n",
"10915002 [joanna gołębiewska] \n",
"10915305 NaN \n",
"10915495 NaN \n",
"10915820 NaN \n",
"10916306 NaN \n",
2021-03-18 17:43:00 +01:00
"\n",
" urls primary_email \\\n",
2021-03-23 19:03:37 +01:00
"34 NaN NaN \n",
"2323 NaN NaN \n",
"7622 [[midwifery care - milla hebammenpraxis, http:... NaN \n",
"7956 NaN NaN \n",
"10508 [[currículo lattes, http://lattes.cnpq.br/0026... NaN \n",
2021-03-18 17:43:00 +01:00
"... ... ... \n",
2021-03-23 19:03:37 +01:00
"10915002 NaN NaN \n",
"10915305 NaN NaN \n",
"10915495 NaN NaN \n",
"10915820 NaN NaN \n",
"10916306 NaN NaN \n",
"\n",
" ... activation_date last_update_date n_doi \\\n",
"34 ... 2014-03-07t04:34:39.598z 2019-05-21t17:08:12.202z 0 \n",
"2323 ... 2016-03-06t08:54:15.121z 2020-08-28t08:31:15.790z 2 \n",
"7622 ... 2017-06-10t07:45:11.387z 2017-06-10t07:55:03.455z 0 \n",
"7956 ... 2019-11-30t14:08:11.221z 2020-05-15t09:06:25.637z 1 \n",
"10508 ... 2017-05-26t19:09:33.432z 2020-06-02t00:23:14.020z 2 \n",
"... ... ... ... ... \n",
"10915002 ... 2017-04-27t10:08:48.102z 2020-12-08t09:44:59.088z 6 \n",
"10915305 ... 2015-02-23t10:29:00.543z 2020-11-30t21:58:07.439z 7 \n",
"10915495 ... 2015-03-10t16:45:31.974z 2020-12-11t00:00:01.060z 3 \n",
"10915820 ... 2018-01-26t02:18:25.551z 2020-11-24t05:37:24.167z 7 \n",
"10916306 ... 2017-03-30t17:08:15.383z 2020-12-09t16:16:50.762z 2 \n",
"\n",
" n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"34 0 0 0 0 NaN \n",
"2323 0 0 4 0 NaN \n",
"7622 0 0 0 0 NaN \n",
"7956 0 0 0 1 NaN \n",
"10508 0 0 2 1 NaN \n",
"... ... ... ... ... ... \n",
"10915002 0 0 0 0 NaN \n",
"10915305 0 0 9 1 NaN \n",
"10915495 0 0 0 1 NaN \n",
"10915820 0 2 7 1 NaN \n",
"10916306 0 0 3 0 NaN \n",
"\n",
" other_email_domains n_emails \n",
"34 [gmail.com] 1.0 \n",
"2323 [eat.kmutnb.ac.th] 1.0 \n",
"7622 [gmail.com, dghwi.de] 2.0 \n",
"7956 [students.cutn.ac.in] 1.0 \n",
"10508 [letras.ufrj.br] 1.0 \n",
"... ... ... \n",
"10915002 [gmail.com] 1.0 \n",
"10915305 [itabc.cnr.it] 1.0 \n",
"10915495 [evonik.com] 1.0 \n",
"10915820 [gmail.com] 1.0 \n",
"10916306 [ucy.ac.cy] 1.0 \n",
"\n",
"[19692 rows x 27 columns]"
2021-03-18 17:43:00 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 36,
2021-03-18 17:43:00 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['primary_email'].isna() & df['other_emails'].notna()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
2021-03-22 19:08:20 +01:00
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
2021-03-18 17:43:00 +01:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## URLs"
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 37,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [],
"source": [
"def extract_url_domains(lst):\n",
" domains = []\n",
" for e in lst:\n",
" # e[0] is a string describing the url\n",
" # e[1] is the url\n",
" domain = tldextract.extract(e[1])\n",
" domains.append(domain.registered_domain)\n",
" return domains"
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 38,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [],
"source": [
"df['url_domains'] = df['urls'].apply(lambda x: extract_url_domains(x) if isinstance(x, list) else x)"
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 39,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
2021-03-22 19:08:20 +01:00
" <th>...</th>\n",
2021-03-23 19:03:37 +01:00
" <th>last_update_date</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
2021-03-18 17:43:00 +01:00
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
2021-03-22 19:08:20 +01:00
" <th>n_emails</th>\n",
2021-03-18 17:43:00 +01:00
" <th>url_domains</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>9</th>\n",
" <td>0000-0001-8718-0056</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[飛資得]</td>\n",
" <td>[[link1, http://orcid.flysheetmed.info], [ntu ...</td>\n",
" <td>ericlin.flysheet@gmail.com</td>\n",
" <td>...</td>\n",
" <td>2019-10-11t17:51:12.473z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>gmail.com</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-22 19:08:20 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[flysheetmed.info, ntu.edu.tw]</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>41</th>\n",
" <td>0000-0002-7845-4016</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[[publication profile, http://publications.lib...</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
" <td>2016-06-06t15:29:36.952z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-18 17:43:00 +01:00
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[chalmers.se]</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>59</th>\n",
" <td>0000-0003-0967-6157</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[徐興慶]</td>\n",
" <td>[[ntu researcher profile, http://ah.ntu.edu.tw...</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
" <td>2017-03-10t07:30:04.778z</td>\n",
" <td>12</td>\n",
2021-03-18 17:43:00 +01:00
" <td>0</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[ntu.edu.tw, ntu.edu.tw]</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>149</th>\n",
" <td>0000-0002-8015-3781</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>alejandro</td>\n",
" <td>ossorio</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[[web de la universidad carlos iii de madrid, ...</td>\n",
" <td>aossorio@di.uc3m.es</td>\n",
" <td>...</td>\n",
" <td>2019-07-04t08:47:12.005z</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>di.uc3m.es</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-22 19:08:20 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[uc3m.es]</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>155</th>\n",
" <td>0000-0003-3444-936x</td>\n",
2021-03-18 17:43:00 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>alessandra</td>\n",
" <td>caravale</td>\n",
" <td>archeologa, con laurea in metodologia e tecnic...</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[[isma- cnr, http://www.isma.cnr.it/?page_id=1...</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
2021-03-22 19:08:20 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>2020-05-14t15:54:38.235z</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>14</td>\n",
" <td>1</td>\n",
2021-03-18 17:43:00 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-22 19:08:20 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[cnr.it]</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2021-03-23 19:03:37 +01:00
"<p>5 rows × 28 columns</p>\n",
2021-03-18 17:43:00 +01:00
"</div>"
],
"text/plain": [
2021-03-23 19:03:37 +01:00
" orcid claimed verified_email verified_primary_email \\\n",
"9 0000-0001-8718-0056 1 1 1 \n",
"41 0000-0002-7845-4016 1 1 1 \n",
"59 0000-0003-0967-6157 1 1 1 \n",
"149 0000-0002-8015-3781 1 1 1 \n",
"155 0000-0003-3444-936x 1 1 1 \n",
"\n",
" given_names family_name \\\n",
"9 NaN NaN \n",
"41 NaN NaN \n",
"59 NaN NaN \n",
"149 alejandro ossorio \n",
"155 alessandra caravale \n",
"\n",
" biography other_names \\\n",
"9 NaN [飛資得] \n",
"41 NaN NaN \n",
"59 NaN [徐興慶] \n",
"149 NaN NaN \n",
"155 archeologa, con laurea in metodologia e tecnic... NaN \n",
"\n",
" urls \\\n",
"9 [[link1, http://orcid.flysheetmed.info], [ntu ... \n",
"41 [[publication profile, http://publications.lib... \n",
"59 [[ntu researcher profile, http://ah.ntu.edu.tw... \n",
"149 [[web de la universidad carlos iii de madrid, ... \n",
"155 [[isma- cnr, http://www.isma.cnr.it/?page_id=1... \n",
"\n",
" primary_email ... last_update_date n_doi n_arxiv \\\n",
"9 ericlin.flysheet@gmail.com ... 2019-10-11t17:51:12.473z 0 0 \n",
"41 NaN ... 2016-06-06t15:29:36.952z 0 0 \n",
"59 NaN ... 2017-03-10t07:30:04.778z 12 0 \n",
"149 aossorio@di.uc3m.es ... 2019-07-04t08:47:12.005z 0 0 \n",
"155 NaN ... 2020-05-14t15:54:38.235z 7 0 \n",
"\n",
" n_pmc n_other_pids label primary_email_domain other_email_domains \\\n",
"9 0 6 1 gmail.com NaN \n",
"41 0 0 0 NaN NaN \n",
"59 0 4 1 NaN NaN \n",
"149 0 0 0 di.uc3m.es NaN \n",
"155 0 14 1 NaN NaN \n",
"\n",
" n_emails url_domains \n",
"9 NaN [flysheetmed.info, ntu.edu.tw] \n",
"41 NaN [chalmers.se] \n",
"59 NaN [ntu.edu.tw, ntu.edu.tw] \n",
"149 NaN [uc3m.es] \n",
"155 NaN [cnr.it] \n",
"\n",
"[5 rows x 28 columns]"
2021-03-18 17:43:00 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 39,
2021-03-18 17:43:00 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df['url_domains'].notna()].head()"
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 40,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [],
"source": [
2021-03-22 19:08:20 +01:00
"df['n_urls'] = df['url_domains'].str.len()"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 41,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
2021-03-22 19:08:20 +01:00
" <th>n_urls</th>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>257375</th>\n",
" <td>0000-0002-1234-835x</td>\n",
2021-03-22 19:08:20 +01:00
" <td>219.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>3630067</th>\n",
2021-03-22 19:08:20 +01:00
" <td>0000-0001-7478-4539</td>\n",
" <td>174.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>5196089</th>\n",
2021-03-22 19:08:20 +01:00
" <td>0000-0002-7392-3792</td>\n",
" <td>169.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10696059</th>\n",
2021-03-22 19:08:20 +01:00
" <td>0000-0002-6938-9638</td>\n",
" <td>152.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>6868932</th>\n",
2021-03-22 19:08:20 +01:00
" <td>0000-0002-5710-4041</td>\n",
" <td>114.0</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
2021-03-22 19:08:20 +01:00
" <td>...</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10916569</th>\n",
" <td>0000-0001-5692-7639</td>\n",
2021-03-22 19:08:20 +01:00
" <td>NaN</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10916570</th>\n",
" <td>0000-0003-1539-0999</td>\n",
2021-03-22 19:08:20 +01:00
" <td>NaN</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10916571</th>\n",
" <td>0000-0003-2858-5509</td>\n",
2021-03-22 19:08:20 +01:00
" <td>NaN</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10916572</th>\n",
" <td>0000-0003-2438-9500</td>\n",
2021-03-22 19:08:20 +01:00
" <td>NaN</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10916573</th>\n",
" <td>0000-0003-4119-4772</td>\n",
2021-03-22 19:08:20 +01:00
" <td>NaN</td>\n",
2021-03-18 17:43:00 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2021-03-23 19:03:37 +01:00
"<p>10916574 rows × 2 columns</p>\n",
2021-03-18 17:43:00 +01:00
"</div>"
],
"text/plain": [
2021-03-22 19:08:20 +01:00
" orcid n_urls\n",
2021-03-23 19:03:37 +01:00
"257375 0000-0002-1234-835x 219.0\n",
"3630067 0000-0001-7478-4539 174.0\n",
"5196089 0000-0002-7392-3792 169.0\n",
"10696059 0000-0002-6938-9638 152.0\n",
"6868932 0000-0002-5710-4041 114.0\n",
2021-03-22 19:08:20 +01:00
"... ... ...\n",
2021-03-23 19:03:37 +01:00
"10916569 0000-0001-5692-7639 NaN\n",
"10916570 0000-0003-1539-0999 NaN\n",
"10916571 0000-0003-2858-5509 NaN\n",
"10916572 0000-0003-2438-9500 NaN\n",
"10916573 0000-0003-4119-4772 NaN\n",
2021-03-22 19:08:20 +01:00
"\n",
2021-03-23 19:03:37 +01:00
"[10916574 rows x 2 columns]"
2021-03-18 17:43:00 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 41,
2021-03-18 17:43:00 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-22 19:08:20 +01:00
"df.sort_values('n_urls', ascending=False)[['orcid', 'n_urls']]"
2021-03-18 17:43:00 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 42,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
2021-03-23 19:03:37 +01:00
"0000-0002-1234-835x",
2021-03-23 09:47:47 +01:00
"0000-0001-7478-4539",
"0000-0002-7392-3792",
"0000-0002-6938-9638",
"0000-0002-5710-4041",
2021-03-23 19:03:37 +01:00
"0000-0003-2450-090x",
2021-03-23 09:47:47 +01:00
"0000-0002-3920-7389",
2021-03-23 19:03:37 +01:00
"0000-0001-5384-0001",
2021-03-23 09:47:47 +01:00
"0000-0002-6689-4129",
"0000-0002-4621-5571",
"0000-0002-7754-8889",
2021-03-23 19:03:37 +01:00
"0000-0001-9131-1266",
2021-03-23 09:47:47 +01:00
"0000-0002-5250-1144",
"0000-0002-9025-8632",
"0000-0002-7456-3848",
"0000-0003-0176-1293",
"0000-0003-0321-7339",
"0000-0002-8493-0402",
"0000-0002-9965-2425",
"0000-0001-8873-6677",
"0000-0002-3997-5070",
"0000-0002-1856-6905",
"0000-0002-4316-1467",
2021-03-23 19:03:37 +01:00
"0000-0002-4062-3603",
2021-03-23 09:47:47 +01:00
"0000-0001-5880-7091",
"0000-0003-0594-2462",
2021-03-23 19:03:37 +01:00
"0000-0003-1524-6268",
"0000-0002-0752-7513",
2021-03-23 09:47:47 +01:00
"0000-0003-2593-7134",
"0000-0002-1298-5252",
"0000-0003-2383-8386",
2021-03-23 19:03:37 +01:00
"0000-0003-1761-3842",
2021-03-23 09:47:47 +01:00
"0000-0003-3546-2312",
"0000-0002-2886-9248",
2021-03-23 19:03:37 +01:00
"0000-0003-4948-9268",
2021-03-23 09:47:47 +01:00
"0000-0003-2183-8112",
2021-03-23 19:03:37 +01:00
"0000-0002-1929-6054",
2021-03-23 09:47:47 +01:00
"0000-0003-2407-3557",
"0000-0001-7133-6896",
"0000-0002-9276-6921",
2021-03-23 19:03:37 +01:00
"0000-0002-4305-4215",
2021-03-23 09:47:47 +01:00
"0000-0003-1484-6958",
2021-03-23 19:03:37 +01:00
"0000-0002-7568-3403",
2021-03-23 09:47:47 +01:00
"0000-0002-4004-6666",
2021-03-23 19:03:37 +01:00
"0000-0003-0796-0234",
2021-03-23 09:47:47 +01:00
"0000-0002-8208-0897",
"0000-0003-4993-5555",
2021-03-23 19:03:37 +01:00
"0000-0002-8116-9611",
2021-03-23 09:47:47 +01:00
"0000-0003-0930-6121",
2021-03-23 19:03:37 +01:00
"0000-0002-9071-5450",
"0000-0002-8122-879x",
2021-03-23 09:47:47 +01:00
"0000-0002-3277-9659",
2021-03-23 19:03:37 +01:00
"0000-0001-9559-1103",
2021-03-23 09:47:47 +01:00
"0000-0003-2862-6315",
"0000-0002-2000-8339",
"0000-0001-5300-4601",
"0000-0002-6547-0172",
"0000-0003-4808-6619",
2021-03-23 19:03:37 +01:00
"0000-0002-5139-2660",
2021-03-23 09:47:47 +01:00
"0000-0002-6254-8683",
"0000-0002-0971-9375",
"0000-0003-3933-0229",
2021-03-23 19:03:37 +01:00
"0000-0003-1585-1134",
2021-03-23 09:47:47 +01:00
"0000-0003-0694-1154",
2021-03-23 19:03:37 +01:00
"0000-0002-4659-5391",
2021-03-23 09:47:47 +01:00
"0000-0001-6461-2573",
2021-03-23 19:03:37 +01:00
"0000-0001-6783-2037",
"0000-0003-4501-3756",
"0000-0002-2916-2893",
2021-03-23 09:47:47 +01:00
"0000-0001-5549-6822",
"0000-0003-4326-9336",
2021-03-23 19:03:37 +01:00
"0000-0001-8978-4830",
2021-03-23 09:47:47 +01:00
"0000-0002-8940-3177",
2021-03-23 19:03:37 +01:00
"0000-0001-8096-4333",
2021-03-23 09:47:47 +01:00
"0000-0002-6680-1703",
"0000-0002-5946-1595",
"0000-0002-8593-9257",
"0000-0002-5196-4905",
2021-03-23 19:03:37 +01:00
"0000-0002-7653-4899",
"0000-0003-1904-4188",
2021-03-23 09:47:47 +01:00
"0000-0001-6921-0426",
2021-03-23 19:03:37 +01:00
"0000-0001-8808-4867",
2021-03-23 09:47:47 +01:00
"0000-0003-1815-1993",
"0000-0001-8644-2114",
2021-03-23 19:03:37 +01:00
"0000-0003-1675-2840",
"0000-0002-7843-8497",
"0000-0001-7784-0583",
2021-03-23 09:47:47 +01:00
"0000-0001-8986-2528",
"0000-0002-5265-6074",
"0000-0001-7550-5802",
2021-03-23 19:03:37 +01:00
"0000-0003-0907-9870",
2021-03-23 09:47:47 +01:00
"0000-0002-0696-8560",
2021-03-23 19:03:37 +01:00
"0000-0002-3334-9386",
2021-03-23 09:47:47 +01:00
"0000-0002-7179-6953",
2021-03-23 19:03:37 +01:00
"0000-0001-6979-4273",
"0000-0001-9102-8639",
2021-03-23 09:47:47 +01:00
"0000-0002-8797-6502",
"0000-0001-9119-5955",
2021-03-23 19:03:37 +01:00
"0000-0001-7608-9433",
"0000-0002-5985-9114"
2021-03-18 17:43:00 +01:00
],
"y": [
2021-03-23 09:47:47 +01:00
219,
174,
169,
152,
114,
114,
111,
104,
2021-03-23 19:03:37 +01:00
104,
2021-03-23 09:47:47 +01:00
90,
83,
83,
81,
81,
80,
80,
80,
76,
73,
72,
71,
70,
69,
69,
68,
68,
68,
68,
67,
67,
66,
66,
65,
64,
61,
61,
2021-03-23 19:03:37 +01:00
61,
2021-03-23 09:47:47 +01:00
59,
57,
57,
57,
57,
57,
57,
57,
56,
55,
55,
55,
55,
50,
50,
50,
49,
49,
48,
48,
48,
48,
2021-03-23 19:03:37 +01:00
48,
2021-03-23 09:47:47 +01:00
47,
47,
46,
46,
2021-03-23 19:03:37 +01:00
46,
2021-03-23 09:47:47 +01:00
45,
45,
45,
45,
44,
43,
43,
43,
43,
42,
42,
42,
41,
41,
2021-03-23 19:03:37 +01:00
41,
2021-03-23 09:47:47 +01:00
40,
40,
39,
39,
39,
39,
38,
38,
38,
38,
38,
37,
37,
37,
37,
37,
36,
36,
36,
36
2021-03-22 19:08:20 +01:00
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
2021-03-18 17:43:00 +01:00
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
2021-03-23 09:47:47 +01:00
"text": "Top 100 ORCID with URLs"
2021-03-18 17:43:00 +01:00
},
"xaxis": {
2021-03-23 09:35:35 +01:00
"range": [
-0.5,
2021-03-23 09:47:47 +01:00
99.5
2021-03-23 09:35:35 +01:00
],
2021-03-18 17:43:00 +01:00
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
2021-03-23 19:03:37 +01:00
"<div> <div id=\"bedc1d58-293d-4a5b-99a0-6e6d8bda10c0\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"bedc1d58-293d-4a5b-99a0-6e6d8bda10c0\")) { Plotly.newPlot( \"bedc1d58-293d-4a5b-99a0-6e6d8bda10c0\", [{\"type\": \"bar\", \"x\": [\"0000-0002-1234-835x\", \"0000-0001-7478-4539\", \"0000-0002-7392-3792\", \"0000-0002-6938-9638\", \"0000-0002-5710-4041\", \"0000-0003-2450-090x\", \"0000-0002-3920-7389\", \"0000-0001-5384-0001\", \"0000-0002-6689-4129\", \"0000-0002-4621-5571\", \"0000-0002-7754-8889\", \"0000-0001-9131-1266\", \"0000-0002-5250-1144\", \"0000-0002-9025-8632\", \"0000-0002-7456-3848\", \"0000-0003-0176-1293\", \"0000-0003-0321-7339\", \"0000-0002-8493-0402\", \"0000-0002-9965-2425\", \"0000-0001-8873-6677\", \"0000-0002-3997-5070\", \"0000-0002-1856-6905\", \"0000-0002-4316-1467\", \"0000-0002-4062-3603\", \"0000-0001-5880-7091\", \"0000-0003-0594-2462\", \"0000-0003-1524-6268\", \"0000-0002-0752-7513\", \"0000-0003-2593-7134\", \"0000-0002-1298-5252\", \"0000-0003-2383-8386\", \"0000-0003-1761-3842\", \"0000-0003-3546-2312\", \"0000-0002-2886-9248\", \"0000-0003-4948-9268\", \"0000-0003-2183-8112\", \"0000-0002-1929-6054\", \"0000-0003-2407-3557\", \"0000-0001-7133-6896\", \"0000-0002-9276-6921\", \"0000-0002-4305-4215\", \"0000-0003-1484-6958\", \"0000-0002-7568-3403\", \"0000-0002-4004-6666\", \"0000-0003-0796-0234\", \"0000-0002-8208-0897\", \"0000-0003-4993-5555\", \"0000-0002-8116-9611\", \"0000-0003-0930-6121\", \"0000-0002-9071-5450\", \"0000-0002-8122-879x\", \"0000-0002-3277-9659\", \"0000-0001-9559-1103\", \"0000-0003-2862-6315\", \"0000-0002-2000-8339\", \"0000-0001-5300-4601\", \"0000-0002-6547-0172\", \"0000-0003-4808-6619\", \"0000-0002-5139-2660\", \"0000-0002-6254-8683\", \"0000-0002-0971-9375\", \"0000-0003-3933-0229\", \"0000-0003-1585-1134\", \"0000-0003-0694-1154\", \"0000-0002-4659-5391\", \"0000-0001-6461-2573\", \"0000-0001-6783-2037\", \"0000-0003-4501-3756\", \"0000-0002-2916-2893\", \"0000-0001-5549-6822\", \"0000-0003-4326-9336\", \"0000-0001-8978-4830\", \"0000-0002-8940-3177\", \"0000-0001-8096-4333\", \"0000-0002-6680-1703\", \"0000-0002-5946-1595\", \"0000-0002-8593-9257\", \"0000-0002-5196-4905\", \"0000-0002-7653-4899\", \"0000-0003-1904-4188\", \"0000-0001-6921-0426\", \"0000-0001-8808-4867\", \"0000-0003-1815-1993\", \"0000-0001-8644-2114\", \"0000-0003-1675-2840\", \"0000-0002-7843-8497\", \"0000-0001-7784-0583\", \"0000-0001-8986-2528\", \"0000-0002-5265-6074\", \"0000-0001-7550-5802\", \"0000-0003-0907-9870\", \"0000-0002-0696-8560\", \"0000-0002-3334-9386\", \"0000-0002-7179-6953\", \"0000-0001-6979-4273\", \"0000-0001-9102-8639\", \"0000-0002-8797-6502\", \"0000-0001-9119-5955\", \"0000-0001-7608-9433\", \"0000-0002-5985-9114\"], \"y\": [219.0, 174.0, 169.0, 152.0, 114.0, 114.0, 111.0, 104.0, 104.0, 90.0, 83.0, 83.0, 81.0, 81.0, 80.0, 80.0, 80.0, 76.0, 73.0, 72.0, 71.0, 70.0, 69.0, 69.0, 68.0, 68.0, 68.0, 68.0, 67.0, 67.0, 66.0, 66.0, 65.0, 64.0, 61.0, 61.0, 61.0, 59.0, 57.0, 57.0, 57.0, 57.0, 57.0, 57.0, 57.0, 56.0, 55.0, 55.0, 55.0, 55.0, 50.0, 50.0, 50.0, 49.0, 49.0, 48.0, 48.0, 48.0, 48.0, 48.0, 47.0, 47.0, 46.0, 46.0, 46.0, 45.0, 45.0, 45.0, 45.0, 44.0, 43.0, 43.0, 43.0, 43.0, 42.0, 42.0, 42.0, 41.0, 41.0, 41.0, 40.0, 40.0, 39.0, 39.0, 39.0, 39.0, 38.0, 38.0, 38.0, 38.0, 38.0, 37.0, 37.0, 37.0, 37.0, 37.0, 36.0, 36.0, 36.0, 36.0]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"
2021-03-18 17:43:00 +01:00
" \n",
2021-03-23 19:03:37 +01:00
"var gd = document.getElementById('bedc1d58-293d-4a5b-99a0-6e6d8bda10c0');\n",
2021-03-18 17:43:00 +01:00
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2021-03-23 09:47:47 +01:00
"set_top_n(100)\n",
2021-03-18 17:43:00 +01:00
"data = [\n",
" go.Bar(\n",
2021-03-23 09:47:47 +01:00
" x=df.sort_values(by=['n_urls'], ascending=False)['orcid'][:TOP_N],\n",
" y=df.sort_values(by=['n_urls'], ascending=False)['n_urls'][:TOP_N]\n",
2021-03-18 17:43:00 +01:00
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
2021-03-23 09:47:47 +01:00
" title='Top %s ORCID with URLs' % TOP_N,\n",
2021-03-23 09:35:35 +01:00
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
2021-03-18 17:43:00 +01:00
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 43,
2021-03-23 09:47:47 +01:00
"metadata": {},
"outputs": [],
"source": [
"grouped_urls = df[['orcid', 'url_domains']]\\\n",
" .explode('url_domains')\\\n",
" .reset_index(drop=True)\\\n",
" .groupby('url_domains')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)"
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 44,
2021-03-23 09:47:47 +01:00
"metadata": {},
2021-03-23 09:35:35 +01:00
"outputs": [
{
"data": {
2021-03-23 09:47:47 +01:00
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"linkedin.com",
"researchgate.net",
"google.com",
"cnpq.br",
"academia.edu",
"twitter.com",
"facebook.com",
"publons.com",
"wordpress.com",
"mendeley.com",
"instagram.com",
"github.io",
"google.com.ua",
"blogspot.com",
"github.com",
2021-03-23 19:03:37 +01:00
"google.es",
2021-03-23 09:47:47 +01:00
"helsinki.fi",
"unirioja.es",
"youtube.com",
"wixsite.com",
"ku.dk",
"scopus.com",
"",
"weebly.com",
"us.es",
"kth.se",
"cityu.edu.hk",
"kcl.ac.uk",
"au.dk",
2021-03-23 19:03:37 +01:00
"man.ac.uk"
2021-03-23 09:47:47 +01:00
],
"y": [
2021-03-23 19:03:37 +01:00
77558,
67357,
44397,
24439,
21054,
18771,
15121,
10622,
8996,
6978,
5881,
5479,
5335,
5240,
5199,
5134,
4711,
4572,
4396,
4120,
3756,
3558,
3494,
3115,
3034,
2952,
2793,
2720,
2717,
2693
2021-03-23 09:47:47 +01:00
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Top 30 URL domains"
},
"xaxis": {
"range": [
-0.5,
29.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
2021-03-23 19:03:37 +01:00
"<div> <div id=\"eb52759a-1585-4b28-996e-061364c1b3c7\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"eb52759a-1585-4b28-996e-061364c1b3c7\")) { Plotly.newPlot( \"eb52759a-1585-4b28-996e-061364c1b3c7\", [{\"type\": \"bar\", \"x\": [\"linkedin.com\", \"researchgate.net\", \"google.com\", \"cnpq.br\", \"academia.edu\", \"twitter.com\", \"facebook.com\", \"publons.com\", \"wordpress.com\", \"mendeley.com\", \"instagram.com\", \"github.io\", \"google.com.ua\", \"blogspot.com\", \"github.com\", \"google.es\", \"helsinki.fi\", \"unirioja.es\", \"youtube.com\", \"wixsite.com\", \"ku.dk\", \"scopus.com\", \"\", \"weebly.com\", \"us.es\", \"kth.se\", \"cityu.edu.hk\", \"kcl.ac.uk\", \"au.dk\", \"man.ac.uk\"], \"y\": [77558, 67357, 44397, 24439, 21054, 18771, 15121, 10622, 8996, 6978, 5881, 5479, 5335, 5240, 5199, 5134, 4711, 4572, 4396, 4120, 3756, 3558, 3494, 3115, 3034, 2952, 2793, 2720, 2717, 2693]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"histogram2d\"}], \"histogram2dcontour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"color
2021-03-23 09:47:47 +01:00
" \n",
2021-03-23 19:03:37 +01:00
"var gd = document.getElementById('eb52759a-1585-4b28-996e-061364c1b3c7');\n",
2021-03-23 09:47:47 +01:00
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
2021-03-23 19:03:37 +01:00
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"set_top_n(30)\n",
"data = [\n",
" go.Bar(\n",
" x=grouped_urls.sort_values(by=['orcid'], ascending=False).index[:TOP_N],\n",
" y=grouped_urls.sort_values(by=['orcid'], ascending=False)['orcid'][:TOP_N]\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top %s URL domains' % TOP_N,\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>...</th>\n",
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>n_emails</th>\n",
" <th>url_domains</th>\n",
" <th>n_urls</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>382497</th>\n",
" <td>0000-0002-9025-8632</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>buycannabis</td>\n",
" <td>dispensary</td>\n",
" <td>we procure and deliver premium cannabis strain...</td>\n",
" <td>[we procure and deliver premium cannabis strai...</td>\n",
" <td>[[find your cannabis &amp; marijuana dispensary , ...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[goowonderland.com, goowonderland.com, goowond...</td>\n",
" <td>81.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>911811</th>\n",
" <td>0000-0002-4062-3603</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>juan de dios</td>\n",
" <td>beltrán mancilla</td>\n",
" <td>juan de dios beltrán mancilla (*) filósofo aut...</td>\n",
" <td>[juan de dios beltrán mancilla, filósofo autod...</td>\n",
" <td>[[01.- juan de dios beltrán mancilla. teoría o...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[yumpu.com, ijopm.org, google.com, blogspot.co...</td>\n",
" <td>69.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1136129</th>\n",
" <td>0000-0002-1929-6054</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>franklin américo</td>\n",
" <td>canaza choque</td>\n",
" <td>docente-investigador social. maestrando en der...</td>\n",
" <td>[franklin américo canaza-choque , franklin a. ...</td>\n",
" <td>[[consejo nacional de ciencia, tecnología e in...</td>\n",
" <td>leo_123fa@hotmail.com</td>\n",
" <td>...</td>\n",
" <td>29</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>33</td>\n",
" <td>1</td>\n",
" <td>hotmail.com</td>\n",
" <td>[gmail.com, gmail.com, hotmail.com, baldwin.ed...</td>\n",
" <td>5.0</td>\n",
" <td>[concytec.gob.pe, redalyc.org, redalyc.org, un...</td>\n",
" <td>61.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3102686</th>\n",
" <td>0000-0003-2593-7134</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>aan</td>\n",
" <td>jaelani</td>\n",
" <td>all my papers can be downloaded from portal:re...</td>\n",
" <td>[jaelani, a., jaelani, aan]</td>\n",
" <td>[[microsoft academic research, https://academi...</td>\n",
" <td>aan_jaelani@syekhnurjati.ac.id</td>\n",
" <td>...</td>\n",
" <td>88</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>193</td>\n",
" <td>1</td>\n",
" <td>syekhnurjati.ac.id</td>\n",
" <td>[gmail.com]</td>\n",
" <td>1.0</td>\n",
" <td>[microsoft.com, twitter.com, academia.edu, aca...</td>\n",
" <td>67.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6868932</th>\n",
" <td>0000-0002-5710-4041</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>ryszard</td>\n",
" <td>romaniuk</td>\n",
" <td>professor of electronics and communications en...</td>\n",
" <td>[r.romaniuk, r.s.romaniuk, ryszard romaniuk, r...</td>\n",
" <td>[[scholar google, http://scholar.google.pl/cit...</td>\n",
" <td>rrom@ise.pw.edu.pl</td>\n",
" <td>...</td>\n",
" <td>1221</td>\n",
" <td>25</td>\n",
" <td>0</td>\n",
" <td>1742</td>\n",
" <td>1</td>\n",
" <td>ise.pw.edu.pl</td>\n",
" <td>[ise.pw.edu.pl, elka.pw.edu.pl, cern.ch]</td>\n",
" <td>3.0</td>\n",
" <td>[google.pl, publons.com, scopus.com, mendeley....</td>\n",
" <td>114.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8088987</th>\n",
" <td>0000-0002-9965-2425</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>jaroslaw</td>\n",
" <td>spychala</td>\n",
" <td>jaroslaw spychala has received a doctoral degr...</td>\n",
" <td>[jaroslaw jozef spychala]</td>\n",
" <td>[[resume, http://www.biowebspin.com/wp-content...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>15</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>29</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[biowebspin.com, biowebspin.com, google.com, l...</td>\n",
" <td>73.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8658355</th>\n",
" <td>0000-0002-3920-7389</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>а.</td>\n",
" <td>гусев</td>\n",
" <td>surname, name gusev alexander leonidovichdate...</td>\n",
" <td>[alexander l. gusev , alexander leonidovich gu...</td>\n",
" <td>[[a.l. gusev alternative energy and ecology, ...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>37</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>21</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[youtube.com, isjaee.com, researchgate.net, re...</td>\n",
" <td>111.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8778864</th>\n",
" <td>0000-0002-3997-5070</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>dr. parameshachari</td>\n",
" <td>b d</td>\n",
" <td>dr. parameshachari b dacm distinguished speake...</td>\n",
" <td>[dr. parameshachari b d]</td>\n",
" <td>[[gsssietw,mysuru, http://geethashishu.in/], [...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>47</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>48</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[geethashishu.in, geethashishu.in, acm.org, go...</td>\n",
" <td>71.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9980164</th>\n",
" <td>0000-0003-4948-9268</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>gustavo</td>\n",
" <td>duperré</td>\n",
" <td>gustavo norberto duperré graduated in arts and...</td>\n",
" <td>[gustavo norberto duperré, duperré, g. n., gus...</td>\n",
" <td>[[gis in cultural heritage - icomos românia, h...</td>\n",
" <td>gustavo.duperre@usal.edu.ar</td>\n",
" <td>...</td>\n",
" <td>13</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>34</td>\n",
" <td>0</td>\n",
" <td>usal.edu.ar</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[icomos.ro, unirioja.es, unirioja.es, unc.edu....</td>\n",
" <td>61.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10024501</th>\n",
" <td>0000-0003-2407-3557</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>abdul</td>\n",
" <td>aziz</td>\n",
" <td>abdul aziz was born on may 25, 1973, in brebes...</td>\n",
" <td>[abdul aziz, aziz, abdul, aziz, a., aziz, abd,...</td>\n",
" <td>[[google scholar, https://scholar.google.com/c...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>19</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>77</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[google.com, syekhnurjati.ac.id, orcid.org, bl...</td>\n",
" <td>59.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10091165</th>\n",
" <td>0000-0003-2183-8112</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>pelayo munhoz</td>\n",
" <td>olea</td>\n",
" <td>pós-doutorado em gestão ambiental pela univers...</td>\n",
" <td>[ munhoz, pelayo olea, olea, pelayo, olea, p...</td>\n",
" <td>[[currículo lattes, http://lattes.cnpq.br/6209...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>797</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>582</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c...</td>\n",
" <td>61.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10523205</th>\n",
" <td>0000-0003-2450-090x</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>eduard</td>\n",
" <td>babulak</td>\n",
" <td>professor eduard babulak is accomplished inter...</td>\n",
" <td>[professor eduard babulak]</td>\n",
" <td>[[honorary chair, chief mentor &amp; senior adviso...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>199</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>174</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[worldassessmentcouncil.org, spseke.sk, bcs.or...</td>\n",
" <td>114.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10696059</th>\n",
" <td>0000-0002-6938-9638</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>adolfo</td>\n",
" <td>catral sanabria</td>\n",
" <td>my education is in computer science, mathemati...</td>\n",
" <td>NaN</td>\n",
" <td>[[researchgate adolfo catral , https://www.res...</td>\n",
" <td>NaN</td>\n",
" <td>...</td>\n",
" <td>2022</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[researchgate.net, youtube.com, linkedin.com, ...</td>\n",
" <td>152.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>13 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email \\\n",
"382497 0000-0002-9025-8632 1 1 \n",
"911811 0000-0002-4062-3603 1 1 \n",
"1136129 0000-0002-1929-6054 1 1 \n",
"3102686 0000-0003-2593-7134 1 1 \n",
"6868932 0000-0002-5710-4041 1 1 \n",
"8088987 0000-0002-9965-2425 1 1 \n",
"8658355 0000-0002-3920-7389 1 1 \n",
"8778864 0000-0002-3997-5070 1 1 \n",
"9980164 0000-0003-4948-9268 1 1 \n",
"10024501 0000-0003-2407-3557 1 1 \n",
"10091165 0000-0003-2183-8112 1 1 \n",
"10523205 0000-0003-2450-090x 1 1 \n",
"10696059 0000-0002-6938-9638 1 1 \n",
"\n",
" verified_primary_email given_names family_name \\\n",
"382497 1 buycannabis dispensary \n",
"911811 1 juan de dios beltrán mancilla \n",
"1136129 1 franklin américo canaza choque \n",
"3102686 1 aan jaelani \n",
"6868932 1 ryszard romaniuk \n",
"8088987 1 jaroslaw spychala \n",
"8658355 1 а. гусев \n",
"8778864 1 dr. parameshachari b d \n",
"9980164 1 gustavo duperré \n",
"10024501 1 abdul aziz \n",
"10091165 1 pelayo munhoz olea \n",
"10523205 1 eduard babulak \n",
"10696059 1 adolfo catral sanabria \n",
2021-03-23 09:47:47 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" biography \\\n",
"382497 we procure and deliver premium cannabis strain... \n",
"911811 juan de dios beltrán mancilla (*) filósofo aut... \n",
"1136129 docente-investigador social. maestrando en der... \n",
"3102686 all my papers can be downloaded from portal:re... \n",
"6868932 professor of electronics and communications en... \n",
"8088987 jaroslaw spychala has received a doctoral degr... \n",
"8658355 surname, name gusev alexander leonidovichdate... \n",
"8778864 dr. parameshachari b dacm distinguished speake... \n",
"9980164 gustavo norberto duperré graduated in arts and... \n",
"10024501 abdul aziz was born on may 25, 1973, in brebes... \n",
"10091165 pós-doutorado em gestão ambiental pela univers... \n",
"10523205 professor eduard babulak is accomplished inter... \n",
"10696059 my education is in computer science, mathemati... \n",
2021-03-23 09:47:47 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" other_names \\\n",
"382497 [we procure and deliver premium cannabis strai... \n",
"911811 [juan de dios beltrán mancilla, filósofo autod... \n",
"1136129 [franklin américo canaza-choque , franklin a. ... \n",
"3102686 [jaelani, a., jaelani, aan] \n",
"6868932 [r.romaniuk, r.s.romaniuk, ryszard romaniuk, r... \n",
"8088987 [jaroslaw jozef spychala] \n",
"8658355 [alexander l. gusev , alexander leonidovich gu... \n",
"8778864 [dr. parameshachari b d] \n",
"9980164 [gustavo norberto duperré, duperré, g. n., gus... \n",
"10024501 [abdul aziz, aziz, abdul, aziz, a., aziz, abd,... \n",
"10091165 [ munhoz, pelayo olea, olea, pelayo, olea, p... \n",
"10523205 [professor eduard babulak] \n",
"10696059 NaN \n",
"\n",
" urls \\\n",
"382497 [[find your cannabis & marijuana dispensary , ... \n",
"911811 [[01.- juan de dios beltrán mancilla. teoría o... \n",
"1136129 [[consejo nacional de ciencia, tecnología e in... \n",
"3102686 [[microsoft academic research, https://academi... \n",
"6868932 [[scholar google, http://scholar.google.pl/cit... \n",
"8088987 [[resume, http://www.biowebspin.com/wp-content... \n",
"8658355 [[a.l. gusev alternative energy and ecology, ... \n",
"8778864 [[gsssietw,mysuru, http://geethashishu.in/], [... \n",
"9980164 [[gis in cultural heritage - icomos românia, h... \n",
"10024501 [[google scholar, https://scholar.google.com/c... \n",
"10091165 [[currículo lattes, http://lattes.cnpq.br/6209... \n",
"10523205 [[honorary chair, chief mentor & senior adviso... \n",
"10696059 [[researchgate adolfo catral , https://www.res... \n",
"\n",
" primary_email ... n_doi n_arxiv n_pmc \\\n",
"382497 NaN ... 0 0 0 \n",
"911811 NaN ... 0 0 0 \n",
"1136129 leo_123fa@hotmail.com ... 29 0 0 \n",
"3102686 aan_jaelani@syekhnurjati.ac.id ... 88 0 0 \n",
"6868932 rrom@ise.pw.edu.pl ... 1221 25 0 \n",
"8088987 NaN ... 15 0 0 \n",
"8658355 NaN ... 37 0 0 \n",
"8778864 NaN ... 47 0 0 \n",
"9980164 gustavo.duperre@usal.edu.ar ... 13 0 0 \n",
"10024501 NaN ... 19 0 0 \n",
"10091165 NaN ... 797 0 1 \n",
"10523205 NaN ... 199 0 1 \n",
"10696059 NaN ... 2022 0 0 \n",
"\n",
" n_other_pids label primary_email_domain \\\n",
"382497 0 0 NaN \n",
"911811 7 0 NaN \n",
"1136129 33 1 hotmail.com \n",
"3102686 193 1 syekhnurjati.ac.id \n",
"6868932 1742 1 ise.pw.edu.pl \n",
"8088987 29 1 NaN \n",
"8658355 21 1 NaN \n",
"8778864 48 1 NaN \n",
"9980164 34 0 usal.edu.ar \n",
"10024501 77 1 NaN \n",
"10091165 582 1 NaN \n",
"10523205 174 1 NaN \n",
"10696059 16 1 NaN \n",
"\n",
" other_email_domains n_emails \\\n",
"382497 NaN NaN \n",
"911811 NaN NaN \n",
"1136129 [gmail.com, gmail.com, hotmail.com, baldwin.ed... 5.0 \n",
"3102686 [gmail.com] 1.0 \n",
"6868932 [ise.pw.edu.pl, elka.pw.edu.pl, cern.ch] 3.0 \n",
"8088987 NaN NaN \n",
"8658355 NaN NaN \n",
"8778864 NaN NaN \n",
"9980164 NaN NaN \n",
"10024501 NaN NaN \n",
"10091165 NaN NaN \n",
"10523205 NaN NaN \n",
"10696059 NaN NaN \n",
"\n",
" url_domains n_urls \n",
"382497 [goowonderland.com, goowonderland.com, goowond... 81.0 \n",
"911811 [yumpu.com, ijopm.org, google.com, blogspot.co... 69.0 \n",
"1136129 [concytec.gob.pe, redalyc.org, redalyc.org, un... 61.0 \n",
"3102686 [microsoft.com, twitter.com, academia.edu, aca... 67.0 \n",
"6868932 [google.pl, publons.com, scopus.com, mendeley.... 114.0 \n",
"8088987 [biowebspin.com, biowebspin.com, google.com, l... 73.0 \n",
"8658355 [youtube.com, isjaee.com, researchgate.net, re... 111.0 \n",
"8778864 [geethashishu.in, geethashishu.in, acm.org, go... 71.0 \n",
"9980164 [icomos.ro, unirioja.es, unirioja.es, unc.edu.... 61.0 \n",
"10024501 [google.com, syekhnurjati.ac.id, orcid.org, bl... 59.0 \n",
"10091165 [cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c... 61.0 \n",
"10523205 [worldassessmentcouncil.org, spseke.sk, bcs.or... 114.0 \n",
"10696059 [researchgate.net, youtube.com, linkedin.com, ... 152.0 \n",
"\n",
"[13 rows x 29 columns]"
2021-03-23 09:47:47 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 45,
2021-03-23 09:47:47 +01:00
"metadata": {},
2021-03-23 19:03:37 +01:00
"output_type": "execute_result"
2021-03-23 09:47:47 +01:00
}
2021-03-23 19:03:37 +01:00
],
"source": [
"df[(df['url_domains'].str.len() > 50) & (df['n_works'] > 0)]"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 46,
2021-03-23 09:47:47 +01:00
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>...</th>\n",
2021-03-23 19:03:37 +01:00
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
2021-03-23 09:47:47 +01:00
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>n_emails</th>\n",
" <th>url_domains</th>\n",
" <th>n_urls</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>97666</th>\n",
" <td>0000-0002-7843-8497</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>davi</td>\n",
" <td>barbosa</td>\n",
" <td>pesquisador na área sociojurídica, professor, ...</td>\n",
" <td>[professor davi barbosa delmont]</td>\n",
" <td>[[plataforma de cursos ideia criativa, https:/...</td>\n",
" <td>NaN</td>\n",
2021-03-23 09:47:47 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>NaN</td>\n",
" <td>[eadplataforma.com, facebook.com, youtube.com,...</td>\n",
" <td>39.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>200670</th>\n",
" <td>0000-0003-1554-1531</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>katarzyna</td>\n",
" <td>ochman</td>\n",
" <td>katarzyna ochman [kataˈʐɨna ˈɔxman] is assista...</td>\n",
" <td>[[kataˈʐɨna ˈɔxman], catharina ochman, cathari...</td>\n",
" <td>[[researchgate, https://www.researchgate.net/p...</td>\n",
" <td>NaN</td>\n",
2021-03-23 09:47:47 +01:00
" <td>...</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[researchgate.net, academia.edu, facebook.com,...</td>\n",
" <td>11.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>210325</th>\n",
" <td>0000-0003-3080-4643</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>graham</td>\n",
" <td>dawson</td>\n",
" <td>science and engineering faculty (sef) libraria...</td>\n",
" <td>[ graham colin dawson, g.c. dawson]</td>\n",
" <td>[[qut home page, https://www.library.qut.edu.a...</td>\n",
" <td>g.dawson@qut.edu.au</td>\n",
2021-03-23 09:47:47 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>qut.edu.au</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[qut.edu.au, qut.edu.au, google.com.au, resear...</td>\n",
" <td>11.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>218947</th>\n",
" <td>0000-0003-3193-030x</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>juan pablo</td>\n",
" <td>wolff mejia</td>\n",
" <td>aspirante a maestría en derecho y negocios int...</td>\n",
" <td>[juan pablo wolff, pablo wolff mejia, juan p. ...</td>\n",
" <td>[[twitter, https://twitter.com/pablomejiam], [...</td>\n",
" <td>juanpmejia@ulasallista.edu.co</td>\n",
2021-03-23 09:47:47 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>ulasallista.edu.co</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[twitter.com, youtube.com, google.com, linkedi...</td>\n",
" <td>11.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>261974</th>\n",
" <td>0000-0002-5341-6531</td>\n",
2021-03-23 09:35:35 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>trent</td>\n",
" <td>hammond</td>\n",
" <td>mr trent hammond is an honorary research fello...</td>\n",
" <td>[trent ernest hammond (t.e.hammond)]</td>\n",
" <td>[[academic support masters, http://trenthammon...</td>\n",
" <td>trent.hammond@academicsupportmasters.com.au</td>\n",
2021-03-23 09:47:47 +01:00
" <td>...</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>academicsupportmasters.com.au</td>\n",
" <td>[health.nsw.gov.au, csu.edu.au, sociologist.co...</td>\n",
" <td>5.0</td>\n",
" <td>[wix.com, academia.edu, researchgate.net, rese...</td>\n",
" <td>12.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
2021-03-23 09:47:47 +01:00
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10405738</th>\n",
" <td>0000-0002-3374-5709</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>guillermo</td>\n",
" <td>ortiz</td>\n",
" <td>médico, internista, neumólogo, intensivista, e...</td>\n",
" <td>[guillermo ortiz-ruiz]</td>\n",
" <td>[[elsevier, https://www.elsevier.com/], [asoci...</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>62</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>88</td>\n",
" <td>0</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[elsevier.com, amci.org.co, springer.com, revi...</td>\n",
" <td>12.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10472264</th>\n",
" <td>0000-0001-7228-5680</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>text</td>\n",
" <td>protocol</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[[about, https://about.me/textprotocol], [gith...</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[about.me, github.com, gitlab.com, gravatar.co...</td>\n",
" <td>12.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10785961</th>\n",
" <td>0000-0002-3064-0194</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>leonardo fernando</td>\n",
" <td>cruz basso</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[[papers-1, https://www.researchgate.net/profi...</td>\n",
" <td>leonardofernando.basso@mackenzie.br</td>\n",
2021-03-23 09:47:47 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>mackenzie.br</td>\n",
" <td>[mackenzie.br]</td>\n",
" <td>1.0</td>\n",
" <td>[researchgate.net, ssrn.com, cnpq.br, google.c...</td>\n",
" <td>17.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10845645</th>\n",
" <td>0000-0003-1047-4229</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>bayu</td>\n",
" <td>sakti</td>\n",
" <td>bayu purbha saktisaya adalah bayu purbha sakti...</td>\n",
" <td>[bayu purbha sakti]</td>\n",
" <td>[[osf, http://osf.io/qe2ug], [inarxiv, https:/...</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[osf.io, osf.io, academia.edu, mendeley.com, f...</td>\n",
" <td>12.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10896059</th>\n",
" <td>0000-0003-4836-7074</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>karla haydee</td>\n",
" <td>ortiz palafox</td>\n",
" <td>karla haydee ortíz palafoxmiembro del sistema ...</td>\n",
" <td>[karla palafox]</td>\n",
" <td>[[opinión día del maestro, http://www.cronicaj...</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[cronicajalisco.com, youtube.com, tlaquepaque....</td>\n",
" <td>22.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2021-03-23 19:03:37 +01:00
"<p>141 rows × 29 columns</p>\n",
2021-03-23 09:47:47 +01:00
"</div>"
],
"text/plain": [
" orcid claimed verified_email \\\n",
2021-03-23 19:03:37 +01:00
"97666 0000-0002-7843-8497 1 1 \n",
"200670 0000-0003-1554-1531 1 1 \n",
"210325 0000-0003-3080-4643 1 1 \n",
"218947 0000-0003-3193-030x 1 1 \n",
"261974 0000-0002-5341-6531 1 1 \n",
"... ... ... ... \n",
"10405738 0000-0002-3374-5709 1 1 \n",
"10472264 0000-0001-7228-5680 1 1 \n",
"10785961 0000-0002-3064-0194 1 1 \n",
"10845645 0000-0003-1047-4229 1 1 \n",
"10896059 0000-0003-4836-7074 1 1 \n",
"\n",
" verified_primary_email given_names family_name \\\n",
"97666 1 davi barbosa \n",
"200670 1 katarzyna ochman \n",
"210325 1 graham dawson \n",
"218947 1 juan pablo wolff mejia \n",
"261974 1 trent hammond \n",
"... ... ... ... \n",
"10405738 1 guillermo ortiz \n",
"10472264 1 text protocol \n",
"10785961 1 leonardo fernando cruz basso \n",
"10845645 1 bayu sakti \n",
"10896059 1 karla haydee ortiz palafox \n",
2021-03-23 09:47:47 +01:00
"\n",
" biography \\\n",
2021-03-23 19:03:37 +01:00
"97666 pesquisador na área sociojurídica, professor, ... \n",
"200670 katarzyna ochman [kataˈʐɨna ˈɔxman] is assista... \n",
"210325 science and engineering faculty (sef) libraria... \n",
"218947 aspirante a maestría en derecho y negocios int... \n",
"261974 mr trent hammond is an honorary research fello... \n",
"... ... \n",
"10405738 médico, internista, neumólogo, intensivista, e... \n",
"10472264 NaN \n",
"10785961 NaN \n",
"10845645 bayu purbha saktisaya adalah bayu purbha sakti... \n",
"10896059 karla haydee ortíz palafoxmiembro del sistema ... \n",
2021-03-23 09:47:47 +01:00
"\n",
" other_names \\\n",
2021-03-23 19:03:37 +01:00
"97666 [professor davi barbosa delmont] \n",
"200670 [[kataˈʐɨna ˈɔxman], catharina ochman, cathari... \n",
"210325 [ graham colin dawson, g.c. dawson] \n",
"218947 [juan pablo wolff, pablo wolff mejia, juan p. ... \n",
"261974 [trent ernest hammond (t.e.hammond)] \n",
"... ... \n",
"10405738 [guillermo ortiz-ruiz] \n",
"10472264 NaN \n",
"10785961 NaN \n",
"10845645 [bayu purbha sakti] \n",
"10896059 [karla palafox] \n",
2021-03-23 09:47:47 +01:00
"\n",
" urls \\\n",
2021-03-23 19:03:37 +01:00
"97666 [[plataforma de cursos ideia criativa, https:/... \n",
"200670 [[researchgate, https://www.researchgate.net/p... \n",
"210325 [[qut home page, https://www.library.qut.edu.a... \n",
"218947 [[twitter, https://twitter.com/pablomejiam], [... \n",
"261974 [[academic support masters, http://trenthammon... \n",
"... ... \n",
"10405738 [[elsevier, https://www.elsevier.com/], [asoci... \n",
"10472264 [[about, https://about.me/textprotocol], [gith... \n",
"10785961 [[papers-1, https://www.researchgate.net/profi... \n",
"10845645 [[osf, http://osf.io/qe2ug], [inarxiv, https:/... \n",
"10896059 [[opinión día del maestro, http://www.cronicaj... \n",
"\n",
" primary_email ... n_doi n_arxiv \\\n",
"97666 NaN ... 0 0 \n",
"200670 NaN ... 1 0 \n",
"210325 g.dawson@qut.edu.au ... 0 0 \n",
"218947 juanpmejia@ulasallista.edu.co ... 0 0 \n",
"261974 trent.hammond@academicsupportmasters.com.au ... 1 0 \n",
"... ... ... ... ... \n",
"10405738 NaN ... 62 0 \n",
"10472264 NaN ... 0 0 \n",
"10785961 leonardofernando.basso@mackenzie.br ... 5 0 \n",
"10845645 NaN ... 0 0 \n",
"10896059 NaN ... 0 0 \n",
"\n",
" n_pmc n_other_pids label primary_email_domain \\\n",
"97666 0 0 0 NaN \n",
"200670 0 0 1 NaN \n",
"210325 0 6 1 qut.edu.au \n",
"218947 0 0 1 ulasallista.edu.co \n",
"261974 0 1 1 academicsupportmasters.com.au \n",
"... ... ... ... ... \n",
"10405738 0 88 0 NaN \n",
"10472264 0 0 0 NaN \n",
"10785961 0 0 1 mackenzie.br \n",
"10845645 0 0 1 NaN \n",
"10896059 0 2 1 NaN \n",
2021-03-23 09:47:47 +01:00
"\n",
" other_email_domains n_emails \\\n",
2021-03-23 19:03:37 +01:00
"97666 NaN NaN \n",
"200670 NaN NaN \n",
"210325 NaN NaN \n",
"218947 NaN NaN \n",
"261974 [health.nsw.gov.au, csu.edu.au, sociologist.co... 5.0 \n",
"... ... ... \n",
"10405738 NaN NaN \n",
"10472264 NaN NaN \n",
"10785961 [mackenzie.br] 1.0 \n",
"10845645 NaN NaN \n",
"10896059 NaN NaN \n",
2021-03-23 09:47:47 +01:00
"\n",
" url_domains n_urls \n",
2021-03-23 19:03:37 +01:00
"97666 [eadplataforma.com, facebook.com, youtube.com,... 39.0 \n",
"200670 [researchgate.net, academia.edu, facebook.com,... 11.0 \n",
"210325 [qut.edu.au, qut.edu.au, google.com.au, resear... 11.0 \n",
"218947 [twitter.com, youtube.com, google.com, linkedi... 11.0 \n",
"261974 [wix.com, academia.edu, researchgate.net, rese... 12.0 \n",
"... ... ... \n",
"10405738 [elsevier.com, amci.org.co, springer.com, revi... 12.0 \n",
"10472264 [about.me, github.com, gitlab.com, gravatar.co... 12.0 \n",
"10785961 [researchgate.net, ssrn.com, cnpq.br, google.c... 17.0 \n",
"10845645 [osf.io, osf.io, academia.edu, mendeley.com, f... 12.0 \n",
"10896059 [cronicajalisco.com, youtube.com, tlaquepaque.... 22.0 \n",
"\n",
"[141 rows x 29 columns]"
2021-03-23 09:47:47 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 46,
2021-03-23 09:47:47 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-23 19:03:37 +01:00
"df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)]"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 47,
2021-03-23 09:47:47 +01:00
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>...</th>\n",
2021-03-23 19:03:37 +01:00
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
2021-03-23 09:47:47 +01:00
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>n_emails</th>\n",
" <th>url_domains</th>\n",
" <th>n_urls</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>0</th>\n",
" <td>0000-0002-7843-8497</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>davi</td>\n",
" <td>barbosa</td>\n",
" <td>pesquisador na área sociojurídica, professor, ...</td>\n",
" <td>[professor davi barbosa delmont]</td>\n",
" <td>[[plataforma de cursos ideia criativa, https:/...</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[eadplataforma.com, facebook.com, youtube.com,...</td>\n",
" <td>39.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>1</th>\n",
" <td>0000-0003-1554-1531</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>katarzyna</td>\n",
" <td>ochman</td>\n",
" <td>katarzyna ochman [kataˈʐɨna ˈɔxman] is assista...</td>\n",
" <td>[[kataˈʐɨna ˈɔxman], catharina ochman, cathari...</td>\n",
" <td>[[researchgate, https://www.researchgate.net/p...</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[researchgate.net, academia.edu, facebook.com,...</td>\n",
" <td>11.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>2</th>\n",
" <td>0000-0003-3080-4643</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>graham</td>\n",
" <td>dawson</td>\n",
" <td>science and engineering faculty (sef) libraria...</td>\n",
" <td>[ graham colin dawson, g.c. dawson]</td>\n",
" <td>[[qut home page, https://www.library.qut.edu.a...</td>\n",
" <td>g.dawson@qut.edu.au</td>\n",
2021-03-23 09:47:47 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>qut.edu.au</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[qut.edu.au, qut.edu.au, google.com.au, resear...</td>\n",
" <td>11.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>3</th>\n",
" <td>0000-0003-3193-030x</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>juan pablo</td>\n",
" <td>wolff mejia</td>\n",
" <td>aspirante a maestría en derecho y negocios int...</td>\n",
" <td>[juan pablo wolff, pablo wolff mejia, juan p. ...</td>\n",
" <td>[[twitter, https://twitter.com/pablomejiam], [...</td>\n",
" <td>juanpmejia@ulasallista.edu.co</td>\n",
2021-03-23 09:47:47 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>ulasallista.edu.co</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[twitter.com, youtube.com, google.com, linkedi...</td>\n",
" <td>11.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>4</th>\n",
" <td>0000-0002-5341-6531</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>trent</td>\n",
" <td>hammond</td>\n",
" <td>mr trent hammond is an honorary research fello...</td>\n",
" <td>[trent ernest hammond (t.e.hammond)]</td>\n",
" <td>[[academic support masters, http://trenthammon...</td>\n",
" <td>trent.hammond@academicsupportmasters.com.au</td>\n",
2021-03-23 09:47:47 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>academicsupportmasters.com.au</td>\n",
" <td>[health.nsw.gov.au, csu.edu.au, sociologist.co...</td>\n",
" <td>5.0</td>\n",
" <td>[wix.com, academia.edu, researchgate.net, rese...</td>\n",
" <td>12.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>136</th>\n",
" <td>0000-0002-3374-5709</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>guillermo</td>\n",
" <td>ortiz</td>\n",
" <td>médico, internista, neumólogo, intensivista, e...</td>\n",
" <td>[guillermo ortiz-ruiz]</td>\n",
" <td>[[elsevier, https://www.elsevier.com/], [asoci...</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>62</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>88</td>\n",
" <td>0</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[elsevier.com, amci.org.co, springer.com, revi...</td>\n",
" <td>12.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>137</th>\n",
" <td>0000-0001-7228-5680</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>text</td>\n",
" <td>protocol</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[[about, https://about.me/textprotocol], [gith...</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[about.me, github.com, gitlab.com, gravatar.co...</td>\n",
" <td>12.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>138</th>\n",
" <td>0000-0002-3064-0194</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>leonardo fernando</td>\n",
" <td>cruz basso</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[[papers-1, https://www.researchgate.net/profi...</td>\n",
" <td>leonardofernando.basso@mackenzie.br</td>\n",
2021-03-23 09:47:47 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>5</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>mackenzie.br</td>\n",
" <td>[mackenzie.br]</td>\n",
" <td>1.0</td>\n",
" <td>[researchgate.net, ssrn.com, cnpq.br, google.c...</td>\n",
" <td>17.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>139</th>\n",
" <td>0000-0003-1047-4229</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>bayu</td>\n",
" <td>sakti</td>\n",
" <td>bayu purbha saktisaya adalah bayu purbha sakti...</td>\n",
" <td>[bayu purbha sakti]</td>\n",
" <td>[[osf, http://osf.io/qe2ug], [inarxiv, https:/...</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[osf.io, osf.io, academia.edu, mendeley.com, f...</td>\n",
" <td>12.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>140</th>\n",
" <td>0000-0003-4836-7074</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>karla haydee</td>\n",
" <td>ortiz palafox</td>\n",
" <td>karla haydee ortíz palafoxmiembro del sistema ...</td>\n",
" <td>[karla palafox]</td>\n",
" <td>[[opinión día del maestro, http://www.cronicaj...</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[cronicajalisco.com, youtube.com, tlaquepaque....</td>\n",
" <td>22.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2021-03-23 19:03:37 +01:00
"<p>141 rows × 29 columns</p>\n",
2021-03-23 09:47:47 +01:00
"</div>"
],
"text/plain": [
2021-03-23 19:03:37 +01:00
" orcid claimed verified_email verified_primary_email \\\n",
"0 0000-0002-7843-8497 1 1 1 \n",
"1 0000-0003-1554-1531 1 1 1 \n",
"2 0000-0003-3080-4643 1 1 1 \n",
"3 0000-0003-3193-030x 1 1 1 \n",
"4 0000-0002-5341-6531 1 1 1 \n",
".. ... ... ... ... \n",
"136 0000-0002-3374-5709 1 1 1 \n",
"137 0000-0001-7228-5680 1 1 1 \n",
"138 0000-0002-3064-0194 1 1 1 \n",
"139 0000-0003-1047-4229 1 1 1 \n",
"140 0000-0003-4836-7074 1 1 1 \n",
"\n",
" given_names family_name \\\n",
"0 davi barbosa \n",
"1 katarzyna ochman \n",
"2 graham dawson \n",
"3 juan pablo wolff mejia \n",
"4 trent hammond \n",
".. ... ... \n",
"136 guillermo ortiz \n",
"137 text protocol \n",
"138 leonardo fernando cruz basso \n",
"139 bayu sakti \n",
"140 karla haydee ortiz palafox \n",
2021-03-23 09:47:47 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" biography \\\n",
"0 pesquisador na área sociojurídica, professor, ... \n",
"1 katarzyna ochman [kataˈʐɨna ˈɔxman] is assista... \n",
"2 science and engineering faculty (sef) libraria... \n",
"3 aspirante a maestría en derecho y negocios int... \n",
"4 mr trent hammond is an honorary research fello... \n",
".. ... \n",
"136 médico, internista, neumólogo, intensivista, e... \n",
"137 NaN \n",
"138 NaN \n",
"139 bayu purbha saktisaya adalah bayu purbha sakti... \n",
"140 karla haydee ortíz palafoxmiembro del sistema ... \n",
2021-03-23 09:47:47 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" other_names \\\n",
"0 [professor davi barbosa delmont] \n",
"1 [[kataˈʐɨna ˈɔxman], catharina ochman, cathari... \n",
"2 [ graham colin dawson, g.c. dawson] \n",
"3 [juan pablo wolff, pablo wolff mejia, juan p. ... \n",
"4 [trent ernest hammond (t.e.hammond)] \n",
".. ... \n",
"136 [guillermo ortiz-ruiz] \n",
"137 NaN \n",
"138 NaN \n",
"139 [bayu purbha sakti] \n",
"140 [karla palafox] \n",
"\n",
" urls \\\n",
"0 [[plataforma de cursos ideia criativa, https:/... \n",
"1 [[researchgate, https://www.researchgate.net/p... \n",
"2 [[qut home page, https://www.library.qut.edu.a... \n",
"3 [[twitter, https://twitter.com/pablomejiam], [... \n",
"4 [[academic support masters, http://trenthammon... \n",
".. ... \n",
"136 [[elsevier, https://www.elsevier.com/], [asoci... \n",
"137 [[about, https://about.me/textprotocol], [gith... \n",
"138 [[papers-1, https://www.researchgate.net/profi... \n",
"139 [[osf, http://osf.io/qe2ug], [inarxiv, https:/... \n",
"140 [[opinión día del maestro, http://www.cronicaj... \n",
"\n",
" primary_email ... n_doi n_arxiv n_pmc \\\n",
"0 NaN ... 0 0 0 \n",
"1 NaN ... 1 0 0 \n",
"2 g.dawson@qut.edu.au ... 0 0 0 \n",
"3 juanpmejia@ulasallista.edu.co ... 0 0 0 \n",
"4 trent.hammond@academicsupportmasters.com.au ... 1 0 0 \n",
".. ... ... ... ... ... \n",
"136 NaN ... 62 0 0 \n",
"137 NaN ... 0 0 0 \n",
"138 leonardofernando.basso@mackenzie.br ... 5 0 0 \n",
"139 NaN ... 0 0 0 \n",
"140 NaN ... 0 0 0 \n",
"\n",
" n_other_pids label primary_email_domain \\\n",
"0 0 0 NaN \n",
"1 0 1 NaN \n",
"2 6 1 qut.edu.au \n",
"3 0 1 ulasallista.edu.co \n",
"4 1 1 academicsupportmasters.com.au \n",
".. ... ... ... \n",
"136 88 0 NaN \n",
"137 0 0 NaN \n",
"138 0 1 mackenzie.br \n",
"139 0 1 NaN \n",
"140 2 1 NaN \n",
"\n",
" other_email_domains n_emails \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 [health.nsw.gov.au, csu.edu.au, sociologist.co... 5.0 \n",
".. ... ... \n",
"136 NaN NaN \n",
"137 NaN NaN \n",
"138 [mackenzie.br] 1.0 \n",
"139 NaN NaN \n",
"140 NaN NaN \n",
2021-03-23 09:47:47 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" url_domains n_urls \n",
"0 [eadplataforma.com, facebook.com, youtube.com,... 39.0 \n",
"1 [researchgate.net, academia.edu, facebook.com,... 11.0 \n",
"2 [qut.edu.au, qut.edu.au, google.com.au, resear... 11.0 \n",
"3 [twitter.com, youtube.com, google.com, linkedi... 11.0 \n",
"4 [wix.com, academia.edu, researchgate.net, rese... 12.0 \n",
".. ... ... \n",
"136 [elsevier.com, amci.org.co, springer.com, revi... 12.0 \n",
"137 [about.me, github.com, gitlab.com, gravatar.co... 12.0 \n",
"138 [researchgate.net, ssrn.com, cnpq.br, google.c... 17.0 \n",
"139 [osf.io, osf.io, academia.edu, mendeley.com, f... 12.0 \n",
"140 [cronicajalisco.com, youtube.com, tlaquepaque.... 22.0 \n",
2021-03-23 09:47:47 +01:00
"\n",
2021-03-23 19:03:37 +01:00
"[141 rows x 29 columns]"
2021-03-23 09:47:47 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 47,
2021-03-23 09:47:47 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-23 19:03:37 +01:00
"exploded_sources = df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)].explode('works_source').reset_index(drop=True)\n",
"exploded_sources"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 48,
2021-03-23 09:47:47 +01:00
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>...</th>\n",
2021-03-23 19:03:37 +01:00
" <th>n_doi</th>\n",
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
2021-03-23 09:47:47 +01:00
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>n_emails</th>\n",
" <th>url_domains</th>\n",
" <th>n_urls</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2021-03-23 19:03:37 +01:00
" <td>0000-0002-7843-8497</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>davi</td>\n",
" <td>barbosa</td>\n",
" <td>pesquisador na área sociojurídica, professor, ...</td>\n",
" <td>[professor davi barbosa delmont]</td>\n",
" <td>[[plataforma de cursos ideia criativa, https:/...</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
2021-03-23 09:35:35 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[eadplataforma.com, facebook.com, youtube.com,...</td>\n",
" <td>39.0</td>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 09:47:47 +01:00
" <th>1</th>\n",
2021-03-23 19:03:37 +01:00
" <td>0000-0003-1554-1531</td>\n",
2021-03-23 09:35:35 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>katarzyna</td>\n",
" <td>ochman</td>\n",
" <td>katarzyna ochman [kataˈʐɨna ˈɔxman] is assista...</td>\n",
" <td>[[kataˈʐɨna ˈɔxman], catharina ochman, cathari...</td>\n",
" <td>[[researchgate, https://www.researchgate.net/p...</td>\n",
2021-03-23 09:35:35 +01:00
" <td>NaN</td>\n",
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
2021-03-23 09:35:35 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[researchgate.net, academia.edu, facebook.com,...</td>\n",
" <td>11.0</td>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>3</th>\n",
" <td>0000-0003-3193-030x</td>\n",
2021-03-23 09:35:35 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>juan pablo</td>\n",
" <td>wolff mejia</td>\n",
" <td>aspirante a maestría en derecho y negocios int...</td>\n",
" <td>[juan pablo wolff, pablo wolff mejia, juan p. ...</td>\n",
" <td>[[twitter, https://twitter.com/pablomejiam], [...</td>\n",
" <td>juanpmejia@ulasallista.edu.co</td>\n",
2021-03-23 09:35:35 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>ulasallista.edu.co</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[twitter.com, youtube.com, google.com, linkedi...</td>\n",
" <td>11.0</td>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>4</th>\n",
" <td>0000-0002-5341-6531</td>\n",
2021-03-23 09:35:35 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>trent</td>\n",
" <td>hammond</td>\n",
" <td>mr trent hammond is an honorary research fello...</td>\n",
" <td>[trent ernest hammond (t.e.hammond)]</td>\n",
" <td>[[academic support masters, http://trenthammon...</td>\n",
" <td>trent.hammond@academicsupportmasters.com.au</td>\n",
2021-03-23 09:35:35 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>academicsupportmasters.com.au</td>\n",
" <td>[health.nsw.gov.au, csu.edu.au, sociologist.co...</td>\n",
" <td>5.0</td>\n",
" <td>[wix.com, academia.edu, researchgate.net, rese...</td>\n",
" <td>12.0</td>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>5</th>\n",
" <td>0000-0001-5295-2271</td>\n",
2021-03-23 09:35:35 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>antoniy</td>\n",
" <td>moysey</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
2021-03-23 09:35:35 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[[academic journals database, http://journalda...</td>\n",
" <td>antoniimoisei@bsmu.edu.ua</td>\n",
2021-03-23 09:35:35 +01:00
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>bsmu.edu.ua</td>\n",
2021-03-23 09:35:35 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[journaldatabase.info, nplu.org, acls.org, ind...</td>\n",
" <td>21.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>135</th>\n",
" <td>0000-0002-8125-0081</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>issam</td>\n",
" <td>bencheikh</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[issame1982, دكتور عصام بن الشيخ]</td>\n",
" <td>[[my blog web site, http://issame1982.blogspot...</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[blogspot.com, researchgate.net, google.com, l...</td>\n",
" <td>12.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>136</th>\n",
" <td>0000-0002-3374-5709</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>guillermo</td>\n",
" <td>ortiz</td>\n",
" <td>médico, internista, neumólogo, intensivista, e...</td>\n",
" <td>[guillermo ortiz-ruiz]</td>\n",
" <td>[[elsevier, https://www.elsevier.com/], [asoci...</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>62</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>88</td>\n",
" <td>0</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[elsevier.com, amci.org.co, springer.com, revi...</td>\n",
" <td>12.0</td>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>137</th>\n",
" <td>0000-0001-7228-5680</td>\n",
2021-03-23 09:35:35 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>text</td>\n",
" <td>protocol</td>\n",
2021-03-23 09:35:35 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[[about, https://about.me/textprotocol], [gith...</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-03-23 09:35:35 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[about.me, github.com, gitlab.com, gravatar.co...</td>\n",
" <td>12.0</td>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>139</th>\n",
" <td>0000-0003-1047-4229</td>\n",
2021-03-23 09:35:35 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>bayu</td>\n",
" <td>sakti</td>\n",
" <td>bayu purbha saktisaya adalah bayu purbha sakti...</td>\n",
" <td>[bayu purbha sakti]</td>\n",
" <td>[[osf, http://osf.io/qe2ug], [inarxiv, https:/...</td>\n",
2021-03-23 09:35:35 +01:00
" <td>NaN</td>\n",
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
2021-03-23 09:35:35 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 09:47:47 +01:00
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[osf.io, osf.io, academia.edu, mendeley.com, f...</td>\n",
" <td>12.0</td>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>140</th>\n",
" <td>0000-0003-4836-7074</td>\n",
2021-03-23 09:35:35 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>karla haydee</td>\n",
" <td>ortiz palafox</td>\n",
" <td>karla haydee ortíz palafoxmiembro del sistema ...</td>\n",
" <td>[karla palafox]</td>\n",
" <td>[[opinión día del maestro, http://www.cronicaj...</td>\n",
2021-03-23 09:35:35 +01:00
" <td>NaN</td>\n",
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
2021-03-23 09:35:35 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[cronicajalisco.com, youtube.com, tlaquepaque....</td>\n",
" <td>22.0</td>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2021-03-23 19:03:37 +01:00
"<p>115 rows × 29 columns</p>\n",
2021-03-23 09:35:35 +01:00
"</div>"
],
"text/plain": [
2021-03-23 19:03:37 +01:00
" orcid claimed verified_email verified_primary_email \\\n",
"0 0000-0002-7843-8497 1 1 1 \n",
"1 0000-0003-1554-1531 1 1 1 \n",
"3 0000-0003-3193-030x 1 1 1 \n",
"4 0000-0002-5341-6531 1 1 1 \n",
"5 0000-0001-5295-2271 1 1 1 \n",
".. ... ... ... ... \n",
"135 0000-0002-8125-0081 1 1 1 \n",
"136 0000-0002-3374-5709 1 1 1 \n",
"137 0000-0001-7228-5680 1 1 1 \n",
"139 0000-0003-1047-4229 1 1 1 \n",
"140 0000-0003-4836-7074 1 1 1 \n",
"\n",
" given_names family_name \\\n",
"0 davi barbosa \n",
"1 katarzyna ochman \n",
"3 juan pablo wolff mejia \n",
"4 trent hammond \n",
"5 antoniy moysey \n",
".. ... ... \n",
"135 issam bencheikh \n",
"136 guillermo ortiz \n",
"137 text protocol \n",
"139 bayu sakti \n",
"140 karla haydee ortiz palafox \n",
"\n",
" biography \\\n",
"0 pesquisador na área sociojurídica, professor, ... \n",
"1 katarzyna ochman [kataˈʐɨna ˈɔxman] is assista... \n",
"3 aspirante a maestría en derecho y negocios int... \n",
"4 mr trent hammond is an honorary research fello... \n",
"5 NaN \n",
".. ... \n",
"135 NaN \n",
"136 médico, internista, neumólogo, intensivista, e... \n",
"137 NaN \n",
"139 bayu purbha saktisaya adalah bayu purbha sakti... \n",
"140 karla haydee ortíz palafoxmiembro del sistema ... \n",
"\n",
" other_names \\\n",
"0 [professor davi barbosa delmont] \n",
"1 [[kataˈʐɨna ˈɔxman], catharina ochman, cathari... \n",
"3 [juan pablo wolff, pablo wolff mejia, juan p. ... \n",
"4 [trent ernest hammond (t.e.hammond)] \n",
"5 NaN \n",
".. ... \n",
"135 [issame1982, دكتور عصام بن الشيخ] \n",
"136 [guillermo ortiz-ruiz] \n",
"137 NaN \n",
"139 [bayu purbha sakti] \n",
"140 [karla palafox] \n",
"\n",
" urls \\\n",
"0 [[plataforma de cursos ideia criativa, https:/... \n",
"1 [[researchgate, https://www.researchgate.net/p... \n",
"3 [[twitter, https://twitter.com/pablomejiam], [... \n",
"4 [[academic support masters, http://trenthammon... \n",
"5 [[academic journals database, http://journalda... \n",
".. ... \n",
"135 [[my blog web site, http://issame1982.blogspot... \n",
"136 [[elsevier, https://www.elsevier.com/], [asoci... \n",
"137 [[about, https://about.me/textprotocol], [gith... \n",
"139 [[osf, http://osf.io/qe2ug], [inarxiv, https:/... \n",
"140 [[opinión día del maestro, http://www.cronicaj... \n",
"\n",
" primary_email ... n_doi n_arxiv n_pmc \\\n",
"0 NaN ... 0 0 0 \n",
"1 NaN ... 1 0 0 \n",
"3 juanpmejia@ulasallista.edu.co ... 0 0 0 \n",
"4 trent.hammond@academicsupportmasters.com.au ... 1 0 0 \n",
"5 antoniimoisei@bsmu.edu.ua ... 0 0 0 \n",
".. ... ... ... ... ... \n",
"135 NaN ... 0 0 0 \n",
"136 NaN ... 62 0 0 \n",
"137 NaN ... 0 0 0 \n",
"139 NaN ... 0 0 0 \n",
"140 NaN ... 0 0 0 \n",
"\n",
" n_other_pids label primary_email_domain \\\n",
"0 0 0 NaN \n",
"1 0 1 NaN \n",
"3 0 1 ulasallista.edu.co \n",
"4 1 1 academicsupportmasters.com.au \n",
"5 0 1 bsmu.edu.ua \n",
".. ... ... ... \n",
"135 0 1 NaN \n",
"136 88 0 NaN \n",
"137 0 0 NaN \n",
"139 0 1 NaN \n",
"140 2 1 NaN \n",
"\n",
" other_email_domains n_emails \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"3 NaN NaN \n",
"4 [health.nsw.gov.au, csu.edu.au, sociologist.co... 5.0 \n",
"5 NaN NaN \n",
".. ... ... \n",
"135 NaN NaN \n",
"136 NaN NaN \n",
"137 NaN NaN \n",
"139 NaN NaN \n",
"140 NaN NaN \n",
"\n",
" url_domains n_urls \n",
"0 [eadplataforma.com, facebook.com, youtube.com,... 39.0 \n",
"1 [researchgate.net, academia.edu, facebook.com,... 11.0 \n",
"3 [twitter.com, youtube.com, google.com, linkedi... 11.0 \n",
"4 [wix.com, academia.edu, researchgate.net, rese... 12.0 \n",
"5 [journaldatabase.info, nplu.org, acls.org, ind... 21.0 \n",
".. ... ... \n",
"135 [blogspot.com, researchgate.net, google.com, l... 12.0 \n",
"136 [elsevier.com, amci.org.co, springer.com, revi... 12.0 \n",
"137 [about.me, github.com, gitlab.com, gravatar.co... 12.0 \n",
"139 [osf.io, osf.io, academia.edu, mendeley.com, f... 12.0 \n",
"140 [cronicajalisco.com, youtube.com, tlaquepaque.... 22.0 \n",
"\n",
"[115 rows x 29 columns]"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"exploded_sources[exploded_sources.apply(lambda x: x['works_source'].find(x['given_names']) >= 0, axis=1)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Works source"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Paste from Miriam"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## External IDs"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"External IDs should come from reliable sources. ORCiD registrants cannot add them freely."
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"df['n_ids'] = df[df['external_ids'].notna()].external_ids.str.len()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"count 1.301959e+06\n",
"mean 1.358640e+00\n",
"std 6.635087e-01\n",
"min 1.000000e+00\n",
"25% 1.000000e+00\n",
"50% 1.000000e+00\n",
"75% 2.000000e+00\n",
"max 8.000000e+01\n",
"Name: n_ids, dtype: float64"
2021-03-23 09:35:35 +01:00
]
},
2021-03-23 09:47:47 +01:00
"execution_count": 50,
2021-03-23 09:35:35 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-23 19:03:37 +01:00
"df.n_ids.describe()"
2021-03-23 09:35:35 +01:00
]
},
{
"cell_type": "code",
2021-03-23 09:47:47 +01:00
"execution_count": 51,
2021-03-23 19:03:37 +01:00
"metadata": {},
2021-03-23 09:35:35 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>claimed</th>\n",
" <th>verified_email</th>\n",
" <th>verified_primary_email</th>\n",
" <th>given_names</th>\n",
" <th>family_name</th>\n",
" <th>biography</th>\n",
" <th>other_names</th>\n",
" <th>urls</th>\n",
" <th>primary_email</th>\n",
" <th>...</th>\n",
2021-03-23 19:03:37 +01:00
" <th>n_arxiv</th>\n",
" <th>n_pmc</th>\n",
" <th>n_other_pids</th>\n",
" <th>label</th>\n",
2021-03-23 09:35:35 +01:00
" <th>primary_email_domain</th>\n",
" <th>other_email_domains</th>\n",
" <th>n_emails</th>\n",
" <th>url_domains</th>\n",
" <th>n_urls</th>\n",
2021-03-23 19:03:37 +01:00
" <th>n_ids</th>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>7253330</th>\n",
" <td>0000-0002-9554-6633</td>\n",
2021-03-23 09:35:35 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2021-03-23 19:03:37 +01:00
" <td>john a</td>\n",
" <td>williams</td>\n",
2021-03-23 09:35:35 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[[aston university profile page, https://resea...</td>\n",
2021-03-23 09:35:35 +01:00
" <td>NaN</td>\n",
" <td>...</td>\n",
2021-03-23 19:03:37 +01:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>208</td>\n",
2021-03-23 09:47:47 +01:00
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-03-23 19:03:37 +01:00
" <td>[aston.ac.uk]</td>\n",
" <td>1.0</td>\n",
" <td>80.0</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
2021-03-23 19:03:37 +01:00
" </tbody>\n",
"</table>\n",
"<p>1 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" orcid claimed verified_email verified_primary_email \\\n",
"7253330 0000-0002-9554-6633 1 1 1 \n",
"\n",
" given_names family_name biography other_names \\\n",
"7253330 john a williams NaN NaN \n",
"\n",
" urls primary_email ... \\\n",
"7253330 [[aston university profile page, https://resea... NaN ... \n",
"\n",
" n_arxiv n_pmc n_other_pids label primary_email_domain \\\n",
"7253330 0 0 208 1 NaN \n",
"\n",
" other_email_domains n_emails url_domains n_urls n_ids \n",
"7253330 NaN NaN [aston.ac.uk] 1.0 80.0 \n",
"\n",
"[1 rows x 30 columns]"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.n_ids == df.n_ids.max()]"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"ids = df[['orcid', 'external_ids']].explode('external_ids').reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"ids['provider'] = ids[ids.external_ids.notna()]['external_ids'].apply(lambda x: x[0])"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orcid</th>\n",
" <th>external_ids</th>\n",
" <th>provider</th>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
2021-03-23 19:03:37 +01:00
" </thead>\n",
" <tbody>\n",
2021-03-23 09:35:35 +01:00
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>7</th>\n",
" <td>0000-0001-7463-977x</td>\n",
" <td>[loop profile, 371409]</td>\n",
" <td>loop profile</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>9</th>\n",
" <td>0000-0001-8718-0056</td>\n",
" <td>[scopus author id, 55466912100]</td>\n",
" <td>scopus author id</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10</th>\n",
" <td>0000-0001-8718-0056</td>\n",
" <td>[scopus author id, 7102015452]</td>\n",
" <td>scopus author id</td>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>14</th>\n",
" <td>0000-0001-9708-5570</td>\n",
" <td>[researcherid, p-5112-2015]</td>\n",
" <td>researcherid</td>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>15</th>\n",
" <td>0000-0001-9708-5570</td>\n",
" <td>[scopus author id, 42062216900]</td>\n",
" <td>scopus author id</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2021-03-23 19:03:37 +01:00
" orcid external_ids provider\n",
"7 0000-0001-7463-977x [loop profile, 371409] loop profile\n",
"9 0000-0001-8718-0056 [scopus author id, 55466912100] scopus author id\n",
"10 0000-0001-8718-0056 [scopus author id, 7102015452] scopus author id\n",
"14 0000-0001-9708-5570 [researcherid, p-5112-2015] researcherid\n",
"15 0000-0001-9708-5570 [scopus author id, 42062216900] scopus author id"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ids[ids.provider.notna()].head()"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"scopus author id",
"researcherid",
"loop profile",
"ciência id",
"researcher name resolver id",
"中国科学家在线",
"sciprofile",
"isni",
"gnd",
"pitt id",
"technical university of denmark cwis",
"researcher id",
"id dialnet",
"digital author id",
"scopus author id: ",
"authenticusid",
"hku researcherpage",
"uow scholars",
"cti vitae",
"scopus author id:",
"hkust profile",
"chalmers id",
"scopus id",
"iauthor",
"google scholar",
"digital author id (dai)",
"authid",
"dai",
"us epa vivo",
"scopus id",
"authenticus",
"smithsonian profiles",
"github",
"escientist",
"vivo cornell",
"researcherid:",
"id dialnet:",
"dialnet id",
"sciprofiles",
"kaken",
"une researcher id",
"researcherid: ",
"orcid",
"scienceopen",
"profile system identifier",
"orcid id",
"custom"
],
"y": [
1030807,
544825,
117325,
36666,
7907,
4804,
4411,
3075,
2954,
2674,
2483,
1445,
1168,
1124,
1077,
869,
741,
646,
581,
548,
522,
430,
254,
212,
200,
177,
175,
155,
146,
127,
83,
61,
51,
49,
46,
39,
7,
6,
5,
5,
4,
3,
2,
1,
1,
1,
1
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "IDs provided by providers"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"874e7b29-a69b-483e-ab13-ee22b3def40e\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"874e7b29-a69b-483e-ab13-ee22b3def40e\")) { Plotly.newPlot( \"874e7b29-a69b-483e-ab13-ee22b3def40e\", [{\"type\": \"bar\", \"x\": [\"scopus author id\", \"researcherid\", \"loop profile\", \"ci\\u00eancia id\", \"researcher name resolver id\", \"\\u4e2d\\u56fd\\u79d1\\u5b66\\u5bb6\\u5728\\u7ebf\", \"sciprofile\", \"isni\", \"gnd\", \"pitt id\", \"technical university of denmark cwis\", \"researcher id\", \"id dialnet\", \"digital author id\", \"scopus author id: \", \"authenticusid\", \"hku researcherpage\", \"uow scholars\", \"cti vitae\", \"scopus author id:\", \"hkust profile\", \"chalmers id\", \"scopus id\", \"iauthor\", \"google scholar\", \"digital author id (dai)\", \"authid\", \"dai\", \"us epa vivo\", \"scopus id\", \"authenticus\", \"smithsonian profiles\", \"github\", \"escientist\", \"vivo cornell\", \"researcherid:\", \"id dialnet:\", \"dialnet id\", \"sciprofiles\", \"kaken\", \"une researcher id\", \"researcherid: \", \"orcid\", \"scienceopen\", \"profile system identifier\", \"orcid id\", \"custom\"], \"y\": [1030807, 544825, 117325, 36666, 7907, 4804, 4411, 3075, 2954, 2674, 2483, 1445, 1168, 1124, 1077, 869, 741, 646, 581, 548, 522, 430, 254, 212, 200, 177, 175, 155, 146, 127, 83, 61, 51, 49, 46, 39, 7, 6, 5, 5, 4, 3, 2, 1, 1, 1, 1]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"histogram\"}], \"histogram2d\": [{\"colorbar\": {\"outlinewidth\": 0, \"ti
" \n",
"var gd = document.getElementById('874e7b29-a69b-483e-ab13-ee22b3def40e');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
2021-03-23 09:47:47 +01:00
"\n",
2021-03-23 19:03:37 +01:00
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
2021-03-23 09:47:47 +01:00
"\n",
2021-03-23 19:03:37 +01:00
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
2021-03-23 09:47:47 +01:00
"\n",
2021-03-23 19:03:37 +01:00
" }) }; }); </script> </div>"
2021-03-23 09:47:47 +01:00
]
},
"metadata": {},
2021-03-23 19:03:37 +01:00
"output_type": "display_data"
2021-03-23 09:47:47 +01:00
}
],
"source": [
2021-03-23 19:03:37 +01:00
"data = [\n",
" go.Bar(\n",
" x=ids.groupby('provider').count().sort_values('orcid', ascending=False).index,\n",
" y=ids.groupby('provider').count().sort_values('orcid', ascending=False)['orcid']\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='IDs provided by providers',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
2021-03-23 09:47:47 +01:00
]
},
{
2021-03-23 19:03:37 +01:00
"cell_type": "code",
"execution_count": 56,
2021-03-23 09:47:47 +01:00
"metadata": {},
2021-03-23 19:03:37 +01:00
"outputs": [
{
"data": {
"text/plain": [
"array([nan, 'loop profile', 'scopus author id', 'researcherid',\n",
" 'scopus author id: ', 'gnd', 'isni', 'ciência id', 'pitt id',\n",
" 'id dialnet', 'technical university of denmark cwis',\n",
" 'researcher name resolver id', 'scopus author id:',\n",
" 'hkust profile', '中国科学家在线', 'cti vitae', 'escientist',\n",
" 'researcher id', 'sciprofile', 'digital author id', 'scopus id',\n",
" 'uow scholars', 'authenticusid', 'authenticus', 'authid',\n",
" 'hku researcherpage', 'chalmers id', 'iauthor', 'us epa vivo',\n",
" 'digital author id (dai)', 'vivo cornell', 'smithsonian profiles',\n",
" 'github', 'google scholar', 'scopus id', 'researcherid:', 'dai',\n",
" 'kaken', 'orcid id', 'dialnet id', 'profile system identifier',\n",
" 'sciprofiles', 'id dialnet:', 'researcherid: ', 'scienceopen',\n",
" 'une researcher id', 'custom', 'orcid'], dtype=object)"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-23 09:47:47 +01:00
"source": [
2021-03-23 19:03:37 +01:00
"pd.unique(ids['provider'])"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-23 19:03:37 +01:00
"## Keywords"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-23 19:03:37 +01:00
"This field is problematic as users can be nasty and put multiple keywords in one as opposed of having different keywords. Look this"
2021-03-23 09:47:47 +01:00
]
},
{
2021-03-23 19:03:37 +01:00
"cell_type": "code",
"execution_count": 57,
2021-03-23 09:47:47 +01:00
"metadata": {},
2021-03-23 19:03:37 +01:00
"outputs": [
{
"data": {
"text/plain": [
"['data science ',\n",
" 'science of science',\n",
" 'scholarly knowledge mining',\n",
" 'open science',\n",
" 'research infrastructures']"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
2021-03-23 09:47:47 +01:00
"source": [
2021-03-23 19:03:37 +01:00
"df[df['orcid'] == AM]['keywords'].values[0]"
2021-03-23 09:47:47 +01:00
]
},
{
2021-03-23 19:03:37 +01:00
"cell_type": "markdown",
2021-03-23 09:47:47 +01:00
"metadata": {},
"source": [
2021-03-23 19:03:37 +01:00
"I did a good job. The following instead is dirty"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 58,
2021-03-23 09:47:47 +01:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2021-03-23 19:03:37 +01:00
"['open access, open science, libraries, repositories, social web,']"
2021-03-23 09:47:47 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 58,
2021-03-23 09:47:47 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-23 19:03:37 +01:00
"df[df['orcid'] == PP]['keywords'].values[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"So the keyword field needs some cleaning"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"def fix_keywords(lst):\n",
" fixed = set()\n",
" for k in lst:\n",
" tokens = set(k.split(','))\n",
"# tokens.remove('')\n",
" for t in tokens:\n",
" fixed.add(str.strip(t))\n",
" fixed.discard('')\n",
" return list(fixed)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"df['fixed_keywords'] = df[df.keywords.notna()]['keywords'].apply(lambda x: fix_keywords(x))"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 61,
2021-03-23 09:47:47 +01:00
"metadata": {},
"outputs": [
{
"data": {
2021-03-23 09:35:35 +01:00
"text/plain": [
2021-03-23 19:03:37 +01:00
"['open science', 'repositories', 'open access', 'libraries', 'social web']"
2021-03-23 09:35:35 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 61,
2021-03-23 09:35:35 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-23 19:03:37 +01:00
"df[df['orcid'] == PP]['fixed_keywords'].values[0]"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 62,
2021-03-23 09:47:47 +01:00
"metadata": {},
"outputs": [],
"source": [
2021-03-23 19:03:37 +01:00
"df['n_keywords'] = df.keywords.str.len()"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 63,
2021-03-23 09:47:47 +01:00
"metadata": {},
2021-03-23 09:35:35 +01:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2021-03-23 09:47:47 +01:00
" <th>orcid</th>\n",
2021-03-23 19:03:37 +01:00
" <th>n_keywords</th>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
2021-03-23 09:47:47 +01:00
" </thead>\n",
" <tbody>\n",
2021-03-23 09:35:35 +01:00
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>2851081</th>\n",
" <td>0000-0002-0673-0341</td>\n",
" <td>154.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7344151</th>\n",
" <td>0000-0002-7060-4112</td>\n",
" <td>141.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2235440</th>\n",
" <td>0000-0002-6075-3501</td>\n",
" <td>140.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2994233</th>\n",
" <td>0000-0002-4071-0301</td>\n",
" <td>118.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3971323</th>\n",
" <td>0000-0002-9638-8091</td>\n",
" <td>115.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10916569</th>\n",
" <td>0000-0001-5692-7639</td>\n",
" <td>NaN</td>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10916570</th>\n",
" <td>0000-0003-1539-0999</td>\n",
" <td>NaN</td>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10916571</th>\n",
" <td>0000-0003-2858-5509</td>\n",
" <td>NaN</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10916572</th>\n",
" <td>0000-0003-2438-9500</td>\n",
" <td>NaN</td>\n",
2021-03-23 09:47:47 +01:00
" </tr>\n",
" <tr>\n",
2021-03-23 19:03:37 +01:00
" <th>10916573</th>\n",
" <td>0000-0003-4119-4772</td>\n",
" <td>NaN</td>\n",
2021-03-23 09:35:35 +01:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2021-03-23 19:03:37 +01:00
"<p>10916574 rows × 2 columns</p>\n",
2021-03-23 09:35:35 +01:00
"</div>"
],
"text/plain": [
2021-03-23 19:03:37 +01:00
" orcid n_keywords\n",
"2851081 0000-0002-0673-0341 154.0\n",
"7344151 0000-0002-7060-4112 141.0\n",
"2235440 0000-0002-6075-3501 140.0\n",
"2994233 0000-0002-4071-0301 118.0\n",
"3971323 0000-0002-9638-8091 115.0\n",
"... ... ...\n",
"10916569 0000-0001-5692-7639 NaN\n",
"10916570 0000-0003-1539-0999 NaN\n",
"10916571 0000-0003-2858-5509 NaN\n",
"10916572 0000-0003-2438-9500 NaN\n",
"10916573 0000-0003-4119-4772 NaN\n",
"\n",
"[10916574 rows x 2 columns]"
2021-03-23 09:47:47 +01:00
]
},
2021-03-23 19:03:37 +01:00
"execution_count": 63,
2021-03-23 09:47:47 +01:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-03-23 19:03:37 +01:00
"df.sort_values('n_keywords', ascending=False)[['orcid', 'n_keywords']]"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 64,
2021-03-23 09:47:47 +01:00
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
2021-03-23 19:03:37 +01:00
"0000-0002-0673-0341",
"0000-0002-7060-4112",
"0000-0002-6075-3501",
"0000-0002-4071-0301",
"0000-0002-9638-8091",
"0000-0002-4235-4259",
"0000-0001-9462-5666",
"0000-0003-0076-6287",
"0000-0002-1878-9762",
"0000-0001-6537-7683",
"0000-0001-6307-6027",
"0000-0003-2273-9888",
"0000-0003-1799-0971",
"0000-0001-5287-1949",
"0000-0002-0937-7061",
"0000-0001-9715-9357",
"0000-0001-5696-1052",
"0000-0003-2998-5520",
"0000-0001-5869-2204",
"0000-0002-0156-3580",
"0000-0002-9625-6742",
"0000-0002-8401-8018",
"0000-0001-9985-1697",
"0000-0003-4246-8579",
"0000-0002-7710-0355",
"0000-0002-8083-7382",
"0000-0001-7654-5013",
"0000-0001-6939-3859",
"0000-0002-3061-3364",
"0000-0003-2509-2549",
"0000-0002-0463-0048",
"0000-0001-5230-715x",
"0000-0001-9336-6850",
"0000-0001-5458-7167",
"0000-0003-0209-180x",
"0000-0003-3584-6834",
"0000-0002-8227-5387",
"0000-0002-9381-2264",
"0000-0003-3340-6413",
"0000-0002-2935-1934",
"0000-0002-8644-8396",
"0000-0002-3123-3021",
"0000-0002-8659-6321",
"0000-0002-8449-2211",
"0000-0001-5167-7466",
"0000-0001-5637-1124",
"0000-0003-2532-2906",
"0000-0003-4673-1063",
"0000-0003-4608-3844",
"0000-0002-3532-043x",
"0000-0002-6347-9464",
"0000-0003-4505-3678",
"0000-0002-2683-4527",
"0000-0003-4374-6374",
"0000-0003-4511-7942",
"0000-0002-1103-9651",
"0000-0001-9280-6017",
"0000-0003-3720-1183",
"0000-0001-9586-0780",
"0000-0002-5306-7781",
"0000-0003-2218-1343",
"0000-0002-8499-1045",
"0000-0003-1863-0265",
"0000-0002-5539-1761",
"0000-0003-2550-1859",
"0000-0002-8072-1152",
"0000-0003-3342-6123",
"0000-0001-6861-9561",
"0000-0002-2252-672x",
"0000-0002-3597-3350",
"0000-0002-3907-3552",
"0000-0001-8689-185x",
"0000-0002-5274-7742",
"0000-0002-3186-8860",
"0000-0001-6843-9325",
"0000-0001-7133-7848",
"0000-0003-4486-2684",
"0000-0003-3343-5660",
"0000-0002-9014-2090",
"0000-0002-6282-0640",
"0000-0001-7857-4133",
"0000-0002-1294-2156",
"0000-0002-4432-3448",
"0000-0003-0097-4182",
"0000-0003-1245-7705",
"0000-0001-8445-412x",
"0000-0003-4153-6779",
"0000-0002-9125-6022",
"0000-0002-4598-2891",
"0000-0003-3387-3193",
"0000-0002-3866-6460",
"0000-0002-1411-3028",
"0000-0003-4283-2895",
"0000-0002-0211-7195",
"0000-0002-3898-9542",
"0000-0002-1545-7818",
"0000-0002-4963-9345",
"0000-0002-1770-9660",
"0000-0002-1960-5857",
"0000-0003-2054-477x"
2021-03-23 09:47:47 +01:00
],
"y": [
2021-03-23 19:03:37 +01:00
154,
141,
140,
118,
115,
104,
98,
94,
92,
91,
88,
86,
84,
2021-03-23 09:47:47 +01:00
82,
2021-03-23 19:03:37 +01:00
78,
77,
76,
75,
74,
73,
71,
70,
69,
66,
64,
62,
2021-03-23 09:47:47 +01:00
61,
2021-03-23 19:03:37 +01:00
60,
58,
57,
56,
54,
53,
53,
52,
51,
2021-03-23 09:47:47 +01:00
51,
2021-03-23 19:03:37 +01:00
51,
51,
50,
50,
50,
50,
49,
49,
2021-03-23 09:47:47 +01:00
49,
2021-03-23 19:03:37 +01:00
48,
48,
48,
48,
48,
48,
48,
47,
47,
46,
2021-03-23 09:47:47 +01:00
46,
2021-03-23 19:03:37 +01:00
46,
45,
44,
44,
44,
44,
44,
44,
43,
43,
42,
42,
42,
42,
42,
42,
42,
41,
41,
41,
41,
41,
41,
40,
40,
40,
40,
40,
40,
2021-03-23 09:47:47 +01:00
39,
2021-03-23 19:03:37 +01:00
39,
39,
39,
39,
39,
39,
39,
39,
39,
38,
38,
38,
38
2021-03-23 09:47:47 +01:00
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
2021-03-23 19:03:37 +01:00
},
"title": {
"text": "Keywords provided by ORCiD"
},
"xaxis": {
"range": [
-0.5,
99.5
],
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
"<div> <div id=\"9c9248f9-2926-487d-99ef-f05923c1f6bf\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"9c9248f9-2926-487d-99ef-f05923c1f6bf\")) { Plotly.newPlot( \"9c9248f9-2926-487d-99ef-f05923c1f6bf\", [{\"type\": \"bar\", \"x\": [\"0000-0002-0673-0341\", \"0000-0002-7060-4112\", \"0000-0002-6075-3501\", \"0000-0002-4071-0301\", \"0000-0002-9638-8091\", \"0000-0002-4235-4259\", \"0000-0001-9462-5666\", \"0000-0003-0076-6287\", \"0000-0002-1878-9762\", \"0000-0001-6537-7683\", \"0000-0001-6307-6027\", \"0000-0003-2273-9888\", \"0000-0003-1799-0971\", \"0000-0001-5287-1949\", \"0000-0002-0937-7061\", \"0000-0001-9715-9357\", \"0000-0001-5696-1052\", \"0000-0003-2998-5520\", \"0000-0001-5869-2204\", \"0000-0002-0156-3580\", \"0000-0002-9625-6742\", \"0000-0002-8401-8018\", \"0000-0001-9985-1697\", \"0000-0003-4246-8579\", \"0000-0002-7710-0355\", \"0000-0002-8083-7382\", \"0000-0001-7654-5013\", \"0000-0001-6939-3859\", \"0000-0002-3061-3364\", \"0000-0003-2509-2549\", \"0000-0002-0463-0048\", \"0000-0001-5230-715x\", \"0000-0001-9336-6850\", \"0000-0001-5458-7167\", \"0000-0003-0209-180x\", \"0000-0003-3584-6834\", \"0000-0002-8227-5387\", \"0000-0002-9381-2264\", \"0000-0003-3340-6413\", \"0000-0002-2935-1934\", \"0000-0002-8644-8396\", \"0000-0002-3123-3021\", \"0000-0002-8659-6321\", \"0000-0002-8449-2211\", \"0000-0001-5167-7466\", \"0000-0001-5637-1124\", \"0000-0003-2532-2906\", \"0000-0003-4673-1063\", \"0000-0003-4608-3844\", \"0000-0002-3532-043x\", \"0000-0002-6347-9464\", \"0000-0003-4505-3678\", \"0000-0002-2683-4527\", \"0000-0003-4374-6374\", \"0000-0003-4511-7942\", \"0000-0002-1103-9651\", \"0000-0001-9280-6017\", \"0000-0003-3720-1183\", \"0000-0001-9586-0780\", \"0000-0002-5306-7781\", \"0000-0003-2218-1343\", \"0000-0002-8499-1045\", \"0000-0003-1863-0265\", \"0000-0002-5539-1761\", \"0000-0003-2550-1859\", \"0000-0002-8072-1152\", \"0000-0003-3342-6123\", \"0000-0001-6861-9561\", \"0000-0002-2252-672x\", \"0000-0002-3597-3350\", \"0000-0002-3907-3552\", \"0000-0001-8689-185x\", \"0000-0002-5274-7742\", \"0000-0002-3186-8860\", \"0000-0001-6843-9325\", \"0000-0001-7133-7848\", \"0000-0003-4486-2684\", \"0000-0003-3343-5660\", \"0000-0002-9014-2090\", \"0000-0002-6282-0640\", \"0000-0001-7857-4133\", \"0000-0002-1294-2156\", \"0000-0002-4432-3448\", \"0000-0003-0097-4182\", \"0000-0003-1245-7705\", \"0000-0001-8445-412x\", \"0000-0003-4153-6779\", \"0000-0002-9125-6022\", \"0000-0002-4598-2891\", \"0000-0003-3387-3193\", \"0000-0002-3866-6460\", \"0000-0002-1411-3028\", \"0000-0003-4283-2895\", \"0000-0002-0211-7195\", \"0000-0002-3898-9542\", \"0000-0002-1545-7818\", \"0000-0002-4963-9345\", \"0000-0002-1770-9660\", \"0000-0002-1960-5857\", \"0000-0003-2054-477x\"], \"y\": [154.0, 141.0, 140.0, 118.0, 115.0, 104.0, 98.0, 94.0, 92.0, 91.0, 88.0, 86.0, 84.0, 82.0, 78.0, 77.0, 76.0, 75.0, 74.0, 73.0, 71.0, 70.0, 69.0, 66.0, 64.0, 62.0, 61.0, 60.0, 58.0, 57.0, 56.0, 54.0, 53.0, 53.0, 52.0, 51.0, 51.0, 51.0, 51.0, 50.0, 50.0, 50.0, 50.0, 49.0, 49.0, 49.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 47.0, 47.0, 46.0, 46.0, 46.0, 45.0, 44.0, 44.0, 44.0, 44.0, 44.0, 44.0, 43.0, 43.0, 42.0, 42.0, 42.0, 42.0, 42.0, 42.0, 42.0, 41.0, 41.0, 41.0, 41.0, 41.0, 41.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 39.0, 39.0, 39.0, 39.0, 39.0, 39.0, 39.0, 39.0, 39.0, 39.0, 38.0, 38.0, 38.0, 38.0]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gri
" \n",
"var gd = document.getElementById('9c9248f9-2926-487d-99ef-f05923c1f6bf');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
2021-03-23 12:13:04 +01:00
]
},
"metadata": {},
2021-03-23 19:03:37 +01:00
"output_type": "display_data"
2021-03-23 12:13:04 +01:00
}
],
"source": [
2021-03-23 19:03:37 +01:00
"set_top_n(100)\n",
"data = [\n",
" go.Bar(\n",
" x=df.sort_values('n_keywords', ascending=False)['orcid'][:TOP_N],\n",
" y=df.sort_values('n_keywords', ascending=False)['n_keywords'][:TOP_N]\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Keywords provided by ORCiD',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
2021-03-23 12:13:04 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 65,
2021-03-23 09:47:47 +01:00
"metadata": {},
"outputs": [],
"source": [
2021-03-23 19:03:37 +01:00
"grouped_keywords = df[['orcid', 'keywords']]\\\n",
" .explode('keywords')\\\n",
" .reset_index(drop=True)\\\n",
" .groupby('keywords')\\\n",
" .count()\\\n",
" .sort_values('orcid', ascending=False)"
2021-03-23 09:47:47 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 66,
2021-03-23 09:47:47 +01:00
"metadata": {},
2021-03-23 19:03:37 +01:00
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"linkText": "Export to plot.ly",
"plotlyServerURL": "https://plot.ly",
"showLink": false
},
"data": [
{
"type": "bar",
"x": [
"machine learning",
"bioinformatics",
"molecular biology",
"artificial intelligence",
"education",
"epidemiology",
"neuroscience",
"public health",
"cancer",
"immunology",
"microbiology",
"ecology",
"remote sensing",
"genetics",
"climate change",
"deep learning",
"genomics",
"biochemistry",
"data science",
"computer vision",
"psychology",
"sustainability",
"biotechnology",
"nanotechnology",
"robotics",
"data mining",
"statistics",
"image processing",
"gis",
"nutrition",
"chemistry",
"optimization",
"computer science",
"marketing",
"biomaterials",
"nanomaterials",
"renewable energy",
"organic chemistry",
"electrochemistry",
"educação",
"diabetes",
"analytical chemistry",
"innovation",
"materials science",
"mass spectrometry",
"architecture",
"evolution",
"epigenetics",
"physics",
"biomechanics"
2021-03-23 09:47:47 +01:00
],
"y": [
2021-03-23 19:03:37 +01:00
5090,
3299,
2377,
2322,
2218,
2121,
2010,
1973,
1956,
1829,
1805,
1788,
1688,
1648,
1621,
1485,
1451,
1398,
1363,
1339,
1331,
1313,
1298,
1294,
1149,
1135,
1134,
1104,
1075,
1071,
1064,
1042,
1037,
1030,
1013,
1010,
1005,
977,
977,
959,
958,
953,
937,
917,
910,
895,
892,
884,
883,
882
2021-03-23 09:47:47 +01:00
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
2021-03-23 19:03:37 +01:00
"text": "Top-50 keywords occurrence"
2021-03-23 09:47:47 +01:00
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
2021-03-23 19:03:37 +01:00
"<div> <div id=\"340562d0-8ded-4225-b510-32d718f8ce98\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"340562d0-8ded-4225-b510-32d718f8ce98\")) { Plotly.newPlot( \"340562d0-8ded-4225-b510-32d718f8ce98\", [{\"type\": \"bar\", \"x\": [\"machine learning\", \"bioinformatics\", \"molecular biology\", \"artificial intelligence\", \"education\", \"epidemiology\", \"neuroscience\", \"public health\", \"cancer\", \"immunology\", \"microbiology\", \"ecology\", \"remote sensing\", \"genetics\", \"climate change\", \"deep learning\", \"genomics\", \"biochemistry\", \"data science\", \"computer vision\", \"psychology\", \"sustainability\", \"biotechnology\", \"nanotechnology\", \"robotics\", \"data mining\", \"statistics\", \"image processing\", \"gis\", \"nutrition\", \"chemistry\", \"optimization\", \"computer science\", \"marketing\", \"biomaterials\", \"nanomaterials\", \"renewable energy\", \"organic chemistry\", \"electrochemistry\", \"educa\\u00e7\\u00e3o\", \"diabetes\", \"analytical chemistry\", \"innovation\", \"materials science\", \"mass spectrometry\", \"architecture\", \"evolution\", \"epigenetics\", \"physics\", \"biomechanics\"], \"y\": [5090, 3299, 2377, 2322, 2218, 2121, 2010, 1973, 1956, 1829, 1805, 1788, 1688, 1648, 1621, 1485, 1451, 1398, 1363, 1339, 1331, 1313, 1298, 1294, 1149, 1135, 1134, 1104, 1075, 1071, 1064, 1042, 1037, 1030, 1013, 1010, 1005, 977, 977, 959, 958, 953, 937, 917, 910, 895, 892, 884, 883, 882]}], {\"template\": {\"data\": {\"bar\": [{\"error_x\": {\"color\": \"#2a3f5f\"}, \"error_y\": {\"color\": \"#2a3f5f\"}, \"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"bar\"}], \"barpolar\": [{\"marker\": {\"line\": {\"color\": \"#E5ECF6\", \"width\": 0.5}}, \"type\": \"barpolar\"}], \"carpet\": [{\"aaxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"baxis\": {\"endlinecolor\": \"#2a3f5f\", \"gridcolor\": \"white\", \"linecolor\": \"white\", \"minorgridcolor\": \"white\", \"startlinecolor\": \"#2a3f5f\"}, \"type\": \"carpet\"}], \"choropleth\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"choropleth\"}], \"contour\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"contour\"}], \"contourcarpet\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"type\": \"contourcarpet\"}], \"heatmap\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmap\"}], \"heatmapgl\": [{\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}, \"colorscale\": [[0.0, \"#0d0887\"], [0.1111111111111111, \"#46039f\"], [0.2222222222222222, \"#7201a8\"], [0.3333333333333333, \"#9c179e\"], [0.4444444444444444, \"#bd3786\"], [0.5555555555555556, \"#d8576b\"], [0.6666666666666666, \"#ed7953\"], [0.7777777777777778, \"#fb9f3a\"], [0.8888888888888888, \"#fdca26\"], [1.0, \"#f0f921\"]], \"type\": \"heatmapgl\"}], \"histogram\": [{\"marker\": {\"colorbar\": {\"outlinewidth\": 0, \"ticks\": \"\"}}, \"type\": \"hi
2021-03-23 09:47:47 +01:00
" \n",
2021-03-23 19:03:37 +01:00
"var gd = document.getElementById('340562d0-8ded-4225-b510-32d718f8ce98');\n",
2021-03-23 09:47:47 +01:00
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
2021-03-23 09:35:35 +01:00
"\n",
2021-03-23 09:47:47 +01:00
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
2021-03-23 09:35:35 +01:00
"\n",
2021-03-23 09:47:47 +01:00
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
2021-03-23 09:35:35 +01:00
"\n",
2021-03-23 09:47:47 +01:00
" }) }; }); </script> </div>"
2021-03-23 09:35:35 +01:00
]
},
"metadata": {},
2021-03-23 09:47:47 +01:00
"output_type": "display_data"
2021-03-23 09:35:35 +01:00
}
],
2021-03-23 12:13:04 +01:00
"source": [
"set_top_n(50)\n",
"data = [\n",
" go.Bar(\n",
2021-03-23 19:03:37 +01:00
" x=grouped_keywords.index[:TOP_N],\n",
" y=grouped_keywords['orcid'][:TOP_N]\n",
2021-03-23 12:13:04 +01:00
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Top-%s keywords occurrence' % TOP_N,\n",
2021-03-23 09:47:47 +01:00
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"fig = go.Figure(data=data, layout=layout)\n",
"plotly.offline.iplot(fig)"
2021-03-23 09:35:35 +01:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2021-03-23 09:47:47 +01:00
"## Correlation"
2021-03-23 09:35:35 +01:00
]
},
{
"cell_type": "code",
2021-03-23 19:03:37 +01:00
"execution_count": 67,
2021-03-23 09:35:35 +01:00
"metadata": {},
2021-03-18 17:43:00 +01:00
"outputs": [
{
"data": {
2021-03-23 09:47:47 +01:00
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"coloraxis": "coloraxis",
"hovertemplate": "x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>",
"name": "0",
"type": "heatmap",
"x": [
"claimed",
"verified_email",
"verified_primary_email",
"n_works",
2021-03-23 19:03:37 +01:00
"n_doi",
"n_arxiv",
"n_pmc",
"n_other_pids",
"label",
2021-03-23 09:47:47 +01:00
"n_emails",
"n_urls",
"n_ids",
2021-03-23 19:03:37 +01:00
"n_keywords"
2021-03-23 09:47:47 +01:00
],
"xaxis": "x",
"y": [
"claimed",
"verified_email",
"verified_primary_email",
"n_works",
2021-03-23 19:03:37 +01:00
"n_doi",
"n_arxiv",
"n_pmc",
"n_other_pids",
"label",
2021-03-23 09:47:47 +01:00
"n_emails",
"n_urls",
"n_ids",
2021-03-23 19:03:37 +01:00
"n_keywords"
2021-03-23 09:47:47 +01:00
],
"yaxis": "y",
"z": [
[
2021-03-23 19:03:37 +01:00
null,
null,
null,
null,
2021-03-23 12:13:04 +01:00
null,
2021-03-23 09:47:47 +01:00
null,
null,
null,
null,
null,
null,
null,
null
],
[
null,
1,
2021-03-23 19:03:37 +01:00
0.976162114192081,
0.0643465684083431,
0.06256365939443789,
0.004813567718545522,
0.022837768079663843,
0.0502787806136592,
0.1653735105575277,
0.011077499054971796,
0.01616663985261792,
0.0878909499747942,
0.01702128763665974
],
[
null,
0.976162114192081,
1,
0.06550520343326285,
0.06358869411827689,
0.004936396077751568,
0.0233694641294265,
0.05107779193878439,
0.1673817151653653,
0.009088965040421332,
0.016748133920736267,
0.0891344788642304,
0.01757470891436621
],
[
null,
0.0643465684083431,
0.06550520343326285,
1,
0.9422006818527181,
0.2510908375242561,
0.3452995689789822,
0.8507066573191994,
0.2193240629073337,
0.04012080330827889,
0.050551529905608905,
0.24172815699206937,
0.030991434187229255
],
[
null,
0.06256365939443789,
0.06358869411827689,
0.9422006818527181,
1,
0.2859256869913497,
0.35237093272562453,
0.8207407571730607,
0.20528211794680093,
0.03066074418509789,
0.02249574354972604,
0.22699396525236162,
0.029924481047131947
],
[
null,
0.004813567718545522,
0.004936396077751568,
0.2510908375242561,
0.2859256869913497,
1,
-0.0016834274457045173,
0.18858664921353688,
0.013625663523574786,
0.001327784437475615,
-0.0029341825430676785,
0.00551756083696426,
-0.0010622024587917253
2021-03-23 09:47:47 +01:00
],
[
null,
2021-03-23 19:03:37 +01:00
0.022837768079663843,
0.0233694641294265,
0.3452995689789822,
0.35237093272562453,
-0.0016834274457045173,
2021-03-23 09:47:47 +01:00
1,
2021-03-23 19:03:37 +01:00
0.2813216452145862,
0.06643990962625554,
0.0034751570518647148,
0.00017861447927955607,
0.06890564721203783,
0.02385706062724447
2021-03-23 09:47:47 +01:00
],
[
null,
2021-03-23 19:03:37 +01:00
0.0502787806136592,
0.05107779193878439,
0.8507066573191994,
0.8207407571730607,
0.18858664921353688,
0.2813216452145862,
2021-03-23 09:47:47 +01:00
1,
2021-03-23 19:03:37 +01:00
0.16718354633724933,
0.020109885973996034,
0.008514311952439128,
0.23592717383229878,
0.023831895109099027
2021-03-23 09:47:47 +01:00
],
[
null,
2021-03-23 19:03:37 +01:00
0.1653735105575277,
0.1673817151653653,
0.2193240629073337,
0.20528211794680093,
0.013625663523574786,
0.06643990962625554,
0.16718354633724933,
2021-03-23 09:47:47 +01:00
1,
2021-03-23 19:03:37 +01:00
0.018556742838561485,
0.017316375665650897,
0.2141058845072524,
0.05566397359882779
2021-03-23 09:47:47 +01:00
],
[
null,
2021-03-23 19:03:37 +01:00
0.011077499054971796,
0.009088965040421332,
0.04012080330827889,
0.03066074418509789,
0.001327784437475615,
0.0034751570518647148,
0.020109885973996034,
0.018556742838561485,
2021-03-23 09:47:47 +01:00
1,
2021-03-23 19:03:37 +01:00
0.09408460253059668,
0.0452261998698129,
0.048598477595472214
2021-03-23 09:47:47 +01:00
],
[
null,
2021-03-23 19:03:37 +01:00
0.01616663985261792,
0.016748133920736267,
0.050551529905608905,
0.02249574354972604,
-0.0029341825430676785,
0.00017861447927955607,
0.008514311952439128,
0.017316375665650897,
0.09408460253059668,
2021-03-23 09:47:47 +01:00
1,
2021-03-23 19:03:37 +01:00
0.06946298201611972,
0.15905312700498755
2021-03-23 09:47:47 +01:00
],
[
null,
2021-03-23 19:03:37 +01:00
0.0878909499747942,
0.0891344788642304,
0.24172815699206937,
0.22699396525236162,
0.00551756083696426,
0.06890564721203783,
0.23592717383229878,
0.2141058845072524,
0.0452261998698129,
0.06946298201611972,
2021-03-23 12:13:04 +01:00
1,
2021-03-23 19:03:37 +01:00
0.06380885961518437
2021-03-23 12:13:04 +01:00
],
[
null,
2021-03-23 19:03:37 +01:00
0.01702128763665974,
0.01757470891436621,
0.030991434187229255,
0.029924481047131947,
-0.0010622024587917253,
0.02385706062724447,
0.023831895109099027,
0.05566397359882779,
0.048598477595472214,
0.15905312700498755,
0.06380885961518437,
2021-03-23 09:47:47 +01:00
1
]
]
}
],
"layout": {
"coloraxis": {
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"margin": {
"t": 60
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"xaxis": {
"anchor": "y",
"constrain": "domain",
"domain": [
0,
1
],
"scaleanchor": "y"
},
"yaxis": {
"anchor": "x",
"autorange": "reversed",
"constrain": "domain",
"domain": [
0,
1
]
}
}
},
2021-03-22 19:08:20 +01:00
"text/html": [
2021-03-23 19:03:37 +01:00
"<div> <div id=\"0e5b7e4b-0689-4a4d-a5fa-fa446c871342\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"0e5b7e4b-0689-4a4d-a5fa-fa446c871342\")) { Plotly.newPlot( \"0e5b7e4b-0689-4a4d-a5fa-fa446c871342\", [{\"coloraxis\": \"coloraxis\", \"hovertemplate\": \"x: %{x}<br>y: %{y}<br>color: %{z}<extra></extra>\", \"name\": \"0\", \"type\": \"heatmap\", \"x\": [\"claimed\", \"verified_email\", \"verified_primary_email\", \"n_works\", \"n_doi\", \"n_arxiv\", \"n_pmc\", \"n_other_pids\", \"label\", \"n_emails\", \"n_urls\", \"n_ids\", \"n_keywords\"], \"xaxis\": \"x\", \"y\": [\"claimed\", \"verified_email\", \"verified_primary_email\", \"n_works\", \"n_doi\", \"n_arxiv\", \"n_pmc\", \"n_other_pids\", \"label\", \"n_emails\", \"n_urls\", \"n_ids\", \"n_keywords\"], \"yaxis\": \"y\", \"z\": [[null, null, null, null, null, null, null, null, null, null, null, null, null], [null, 1.0, 0.976162114192081, 0.0643465684083431, 0.06256365939443789, 0.004813567718545522, 0.022837768079663843, 0.0502787806136592, 0.1653735105575277, 0.011077499054971796, 0.01616663985261792, 0.0878909499747942, 0.01702128763665974], [null, 0.976162114192081, 1.0, 0.06550520343326285, 0.06358869411827689, 0.004936396077751568, 0.0233694641294265, 0.05107779193878439, 0.1673817151653653, 0.009088965040421332, 0.016748133920736267, 0.0891344788642304, 0.01757470891436621], [null, 0.0643465684083431, 0.06550520343326285, 1.0, 0.9422006818527181, 0.2510908375242561, 0.3452995689789822, 0.8507066573191994, 0.2193240629073337, 0.04012080330827889, 0.050551529905608905, 0.24172815699206937, 0.030991434187229255], [null, 0.06256365939443789, 0.06358869411827689, 0.9422006818527181, 1.0, 0.2859256869913497, 0.35237093272562453, 0.8207407571730607, 0.20528211794680093, 0.03066074418509789, 0.02249574354972604, 0.22699396525236162, 0.029924481047131947], [null, 0.004813567718545522, 0.004936396077751568, 0.2510908375242561, 0.2859256869913497, 1.0, -0.0016834274457045173, 0.18858664921353688, 0.013625663523574786, 0.001327784437475615, -0.0029341825430676785, 0.00551756083696426, -0.0010622024587917253], [null, 0.022837768079663843, 0.0233694641294265, 0.3452995689789822, 0.35237093272562453, -0.0016834274457045173, 1.0, 0.2813216452145862, 0.06643990962625554, 0.0034751570518647148, 0.00017861447927955607, 0.06890564721203783, 0.02385706062724447], [null, 0.0502787806136592, 0.05107779193878439, 0.8507066573191994, 0.8207407571730607, 0.18858664921353688, 0.2813216452145862, 1.0, 0.16718354633724933, 0.020109885973996034, 0.008514311952439128, 0.23592717383229878, 0.023831895109099027], [null, 0.1653735105575277, 0.1673817151653653, 0.2193240629073337, 0.20528211794680093, 0.013625663523574786, 0.06643990962625554, 0.16718354633724933, 1.0, 0.018556742838561485, 0.017316375665650897, 0.2141058845072524, 0.05566397359882779], [null, 0.011077499054971796, 0.009088965040421332, 0.04012080330827889, 0.03066074418509789, 0.001327784437475615, 0.0034751570518647148, 0.020109885973996034, 0.018556742838561485, 1.0, 0.09408460253059668, 0.0452261998698129, 0.048598477595472214], [null, 0.01616663985261792, 0.016748133920736267, 0.050551529905608905, 0.02249574354972604, -0.0029341825430676785, 0.00017861447927955607, 0.008514311952439128, 0.017316375665650897, 0.09408460253059668, 1.0, 0.06946298201611972, 0.15905312700498755], [null, 0.0878909499747942, 0.0891344788642304, 0.24172815699206937, 0.22699396525236162, 0.00551756083696426, 0.06890564721203783, 0.23592717383229878, 0.2141058845072524, 0.0452261998698129, 0.06946298201611972, 1.0, 0.06380885961518437], [null, 0.01702128763665974, 0.01757470891436621, 0.030991434187229255, 0.029924481047131947, -0.0010622024587917253, 0.02385706062724447, 0.023831895109099027, 0.05566397359
2021-03-23 09:47:47 +01:00
" \n",
2021-03-23 19:03:37 +01:00
"var gd = document.getElementById('0e5b7e4b-0689-4a4d-a5fa-fa446c871342');\n",
2021-03-23 09:47:47 +01:00
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
2021-03-23 09:35:35 +01:00
"\n",
2021-03-23 09:47:47 +01:00
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
2021-03-23 09:35:35 +01:00
"\n",
2021-03-23 09:47:47 +01:00
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
2021-03-23 09:35:35 +01:00
"\n",
2021-03-23 09:47:47 +01:00
" }) }; }); </script> </div>"
2021-03-18 17:43:00 +01:00
]
},
"metadata": {},
2021-03-23 09:47:47 +01:00
"output_type": "display_data"
2021-03-18 17:43:00 +01:00
}
],
"source": [
2021-03-22 19:08:20 +01:00
"fig = px.imshow(df[df.n_ids > 0].corr())\n",
"fig.show()"
2021-03-18 17:43:00 +01:00
]
},
2021-03-23 19:03:37 +01:00
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
2021-03-18 17:43:00 +01:00
{
"cell_type": "code",
2021-03-22 19:08:20 +01:00
"execution_count": null,
2021-03-18 17:43:00 +01:00
"metadata": {},
"outputs": [],
2021-03-22 19:08:20 +01:00
"source": []
2021-03-18 17:43:00 +01:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}