{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Exploratory analysis" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "TODO:\n", "- Understanding the reason for fake profiles can bring insight on how to catch them (could be trivial with prior knowledge, e.g., SEO hacking => URLs)\n", "- Make casistics (e.g. author publishing with empty orcid, author publishing but not on OpenAIRE, etc.)\n", "- Temporal dimension of any use?\n", "- Can we access private info thanks to the OpenAIRE-ORCID agreement?\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "import ast\n", "import tldextract\n", "import numpy\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px\n", "\n", "init_notebook_mode(connected=True)\n", "TOP_N = 0\n", "TOP_RANGE = [0, 0]\n", "def set_top_n(n):\n", " global TOP_N, TOP_RANGE\n", " TOP_N = n\n", " TOP_RANGE = [-.5, n - 1 + .5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Notable solid ORCID iDs for explorative purposes:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "AM = '0000-0002-5193-7851'\n", "PP = '0000-0002-8588-4196'\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Notable anomalies:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "JOURNAL = '0000-0003-1815-5732'\n", "NOINFO = '0000-0001-5009-2052'\n", "VALID_NO_OA = '0000-0002-5154-6404' # True profile, but not in OpenAIRE\n", "# todo: find group-shared ORCiD, if possible" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Notable fake ORCID iDs:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "SCAFFOLD = '0000-0001-5004-7761'\n", "WHATSAPP = '0000-0001-6997-9470'\n", "PENIS = '0000-0002-3399-7287'\n", "BITCOIN = '0000-0002-7518-6845'\n", "FITNESS_CHINA = '0000-0002-1234-835X' # URL record + employment\n", "CANNABIS = '0000-0002-9025-8632' # URL > 70 + works (REMOVED)\n", "PLUMBER = '0000-0002-1700-8311' # URL > 10 + works " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load the dataset" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...employmentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabel
00000-0001-5009-2052111NaNNaNNaNNaNNaNNaN...NaN0NaN2019-06-05t20:25:43.066z2019-12-11t03:57:41.741z00000
10000-0001-5943-0732111NaNNaNNaNNaNNaNNaN...NaN0NaN2015-08-18t13:10:42.871z2016-06-15t01:05:19.986z00000
20000-0001-6083-622x111NaNNaNNaNNaNNaNNaN...NaN0NaN2019-01-21t10:55:27.997z2019-01-28t16:24:02.199z00000
30000-0001-6262-5709111NaNNaNNaNNaNNaNNaN...NaN0NaN2015-08-18t14:29:39.440z2017-06-21t07:18:20.787z00000
40000-0001-6616-4890111NaNNaNNaNNaNNaNNaN...NaN0NaN2015-08-13t01:59:51.802z2016-06-15t01:05:21.373z00000
\n", "

5 rows × 24 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "0 0000-0001-5009-2052 1 1 1 \n", "1 0000-0001-5943-0732 1 1 1 \n", "2 0000-0001-6083-622x 1 1 1 \n", "3 0000-0001-6262-5709 1 1 1 \n", "4 0000-0001-6616-4890 1 1 1 \n", "\n", " given_names family_name biography other_names urls primary_email ... \\\n", "0 NaN NaN NaN NaN NaN NaN ... \n", "1 NaN NaN NaN NaN NaN NaN ... \n", "2 NaN NaN NaN NaN NaN NaN ... \n", "3 NaN NaN NaN NaN NaN NaN ... \n", "4 NaN NaN NaN NaN NaN NaN ... \n", "\n", " employment n_works works_source activation_date \\\n", "0 NaN 0 NaN 2019-06-05t20:25:43.066z \n", "1 NaN 0 NaN 2015-08-18t13:10:42.871z \n", "2 NaN 0 NaN 2019-01-21t10:55:27.997z \n", "3 NaN 0 NaN 2015-08-18t14:29:39.440z \n", "4 NaN 0 NaN 2015-08-13t01:59:51.802z \n", "\n", " last_update_date n_doi n_arxiv n_pmc n_other_pids label \n", "0 2019-12-11t03:57:41.741z 0 0 0 0 0 \n", "1 2016-06-15t01:05:19.986z 0 0 0 0 0 \n", "2 2019-01-28t16:24:02.199z 0 0 0 0 0 \n", "3 2017-06-21t07:18:20.787z 0 0 0 0 0 \n", "4 2016-06-15t01:05:21.373z 0 0 0 0 0 \n", "\n", "[5 rows x 24 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_pickle('../data/processed/dataset.pkl')\n", "df.head(5)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Notable profiles inspection" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...employmentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabel
15758690000-0002-5193-7851111andreamannoccidata scientist & researcher; scholarly knowled...NaN[[personal website, https://andremann.github.i...andrea.mannocci@isti.cnr.it...[[research associate, istituto di scienza e te...37[scopus - elsevier, crossref metadata search, ...2017-09-12t14:28:33.467z2021-03-09t08:32:47.840z3400601
\n", "

1 rows × 24 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "1575869 0000-0002-5193-7851 1 1 1 \n", "\n", " given_names family_name \\\n", "1575869 andrea mannocci \n", "\n", " biography other_names \\\n", "1575869 data scientist & researcher; scholarly knowled... NaN \n", "\n", " urls \\\n", "1575869 [[personal website, https://andremann.github.i... \n", "\n", " primary_email ... \\\n", "1575869 andrea.mannocci@isti.cnr.it ... \n", "\n", " employment n_works \\\n", "1575869 [[research associate, istituto di scienza e te... 37 \n", "\n", " works_source \\\n", "1575869 [scopus - elsevier, crossref metadata search, ... \n", "\n", " activation_date last_update_date n_doi n_arxiv \\\n", "1575869 2017-09-12t14:28:33.467z 2021-03-09t08:32:47.840z 34 0 \n", "\n", " n_pmc n_other_pids label \n", "1575869 0 60 1 \n", "\n", "[1 rows x 24 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['orcid'] == AM]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...employmentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabel
68199860000-0001-6997-9470111otherwhatsappNaNNaN[[otherwhatsapp, https://otherwhatsapp.com/], ...NaN...NaN0NaN2020-10-07t10:37:12.237z2020-10-08t02:32:03.935z00000
\n", "

1 rows × 24 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "6819986 0000-0001-6997-9470 1 1 1 \n", "\n", " given_names family_name biography other_names \\\n", "6819986 other whatsapp NaN NaN \n", "\n", " urls primary_email ... \\\n", "6819986 [[otherwhatsapp, https://otherwhatsapp.com/], ... NaN ... \n", "\n", " employment n_works works_source activation_date \\\n", "6819986 NaN 0 NaN 2020-10-07t10:37:12.237z \n", "\n", " last_update_date n_doi n_arxiv n_pmc n_other_pids label \n", "6819986 2020-10-08t02:32:03.935z 0 0 0 0 0 \n", "\n", "[1 rows x 24 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['orcid'] == WHATSAPP]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "orcid 10916574\n", "claimed 10916574\n", "verified_email 10916574\n", "verified_primary_email 10916574\n", "given_names 10886150\n", "family_name 10601571\n", "biography 348649\n", "other_names 551482\n", "urls 707687\n", "primary_email 123851\n", "other_emails 48306\n", "keywords 646400\n", "external_ids 1301959\n", "education 2430233\n", "employment 2665092\n", "n_works 10916574\n", "works_source 2721431\n", "activation_date 10916574\n", "last_update_date 10916574\n", "n_doi 10916574\n", "n_arxiv 10916574\n", "n_pmc 10916574\n", "n_other_pids 10916574\n", "label 10916574\n", "dtype: int64" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.count()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 10916574\n", "unique 10916574\n", "top 0000-0002-5454-7613\n", "freq 1\n", "Name: orcid, dtype: object" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['orcid'].describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Primary email" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 123851\n", "unique 123848\n", "top patrick.davey@monash.edu\n", "freq 2\n", "Name: primary_email, dtype: object" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['primary_email'].describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Dupe emails" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6347224 maykin@owasp.org\n", "7027865 patrick.davey@monash.edu\n", "9529005 opercin@erbakan.edu.tr\n", "Name: primary_email, dtype: object" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['primary_email'].dropna().loc[df['primary_email'].duplicated()]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...n_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domain
44500460000-0001-9855-1676111maykinwarasartNaNNaNNaNmaykin@owasp.org...0NaN2020-10-23t17:51:51.925z2021-01-01t15:00:52.053z00000owasp.org
63472240000-0002-0836-2271111maykinwarasartNaNNaNNaNmaykin@owasp.org...0NaN2020-09-15t04:43:55.709z2020-09-15t05:17:28.509z00000owasp.org
\n", "

2 rows × 25 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "4450046 0000-0001-9855-1676 1 1 1 \n", "6347224 0000-0002-0836-2271 1 1 1 \n", "\n", " given_names family_name biography other_names urls primary_email \\\n", "4450046 maykin warasart NaN NaN NaN maykin@owasp.org \n", "6347224 maykin warasart NaN NaN NaN maykin@owasp.org \n", "\n", " ... n_works works_source activation_date \\\n", "4450046 ... 0 NaN 2020-10-23t17:51:51.925z \n", "6347224 ... 0 NaN 2020-09-15t04:43:55.709z \n", "\n", " last_update_date n_doi n_arxiv n_pmc n_other_pids label \\\n", "4450046 2021-01-01t15:00:52.053z 0 0 0 0 0 \n", "6347224 2020-09-15t05:17:28.509z 0 0 0 0 0 \n", "\n", " primary_email_domain \n", "4450046 owasp.org \n", "6347224 owasp.org \n", "\n", "[2 rows x 25 columns]" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['primary_email'] == 'maykin@owasp.org']" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...n_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domain
68407910000-0002-2232-9638111osmanperçinNaNNaNNaNopercin@erbakan.edu.tr...0NaN2015-01-12t13:47:55.549z2020-01-27t07:38:24.269z00000erbakan.edu.tr
95290050000-0003-0033-0918111osmanperçinNaNNaNNaNopercin@erbakan.edu.tr...0NaN2015-10-13t05:47:12.014z2020-12-25t13:52:03.976z00000erbakan.edu.tr
\n", "

2 rows × 25 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "6840791 0000-0002-2232-9638 1 1 1 \n", "9529005 0000-0003-0033-0918 1 1 1 \n", "\n", " given_names family_name biography other_names urls \\\n", "6840791 osman perçin NaN NaN NaN \n", "9529005 osman perçin NaN NaN NaN \n", "\n", " primary_email ... n_works works_source \\\n", "6840791 opercin@erbakan.edu.tr ... 0 NaN \n", "9529005 opercin@erbakan.edu.tr ... 0 NaN \n", "\n", " activation_date last_update_date n_doi n_arxiv \\\n", "6840791 2015-01-12t13:47:55.549z 2020-01-27t07:38:24.269z 0 0 \n", "9529005 2015-10-13t05:47:12.014z 2020-12-25t13:52:03.976z 0 0 \n", "\n", " n_pmc n_other_pids label primary_email_domain \n", "6840791 0 0 0 erbakan.edu.tr \n", "9529005 0 0 0 erbakan.edu.tr \n", "\n", "[2 rows x 25 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['primary_email'] == 'opercin@erbakan.edu.tr']" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...n_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domain
9449930000-0002-9158-1757111patrickdaveyNaNNaNNaNpatrick.davey@monash.edu...0NaN2019-05-09t23:01:02.170z2019-08-20t03:00:17.844z00000monash.edu
70278650000-0002-8774-0030111patrickdaveyNaNNaNNaNpatrick.davey@monash.edu...1[crossref]2018-09-11t10:47:10.997z2021-02-09t06:21:44.138z10001monash.edu
\n", "

2 rows × 25 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "944993 0000-0002-9158-1757 1 1 1 \n", "7027865 0000-0002-8774-0030 1 1 1 \n", "\n", " given_names family_name biography other_names urls \\\n", "944993 patrick davey NaN NaN NaN \n", "7027865 patrick davey NaN NaN NaN \n", "\n", " primary_email ... n_works works_source \\\n", "944993 patrick.davey@monash.edu ... 0 NaN \n", "7027865 patrick.davey@monash.edu ... 1 [crossref] \n", "\n", " activation_date last_update_date n_doi n_arxiv \\\n", "944993 2019-05-09t23:01:02.170z 2019-08-20t03:00:17.844z 0 0 \n", "7027865 2018-09-11t10:47:10.997z 2021-02-09t06:21:44.138z 1 0 \n", "\n", " n_pmc n_other_pids label primary_email_domain \n", "944993 0 0 0 monash.edu \n", "7027865 0 0 1 monash.edu \n", "\n", "[2 rows x 25 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['primary_email'] == 'patrick.davey@monash.edu']" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "df['primary_email_domain'] = df['primary_email'].apply(lambda x: x.split('@')[1] if pd.notna(x) else x)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 123851\n", "unique 17089\n", "top gmail.com\n", "freq 26540\n", "Name: primary_email_domain, dtype: object" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['primary_email_domain'].describe()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcid
primary_email_domain
gmail.com26540
hotmail.com3769
yahoo.com2614
163.com2109
yuhs.ac1132
......
imean-biotech.com1
imec.msu.ru1
imedea.uib-csic.es1
imes.uni-hannover.de1
zzuli.edu.cn1
\n", "

17089 rows × 1 columns

\n", "
" ], "text/plain": [ " orcid\n", "primary_email_domain \n", "gmail.com 26540\n", "hotmail.com 3769\n", "yahoo.com 2614\n", "163.com 2109\n", "yuhs.ac 1132\n", "... ...\n", "imean-biotech.com 1\n", "imec.msu.ru 1\n", "imedea.uib-csic.es 1\n", "imes.uni-hannover.de 1\n", "zzuli.edu.cn 1\n", "\n", "[17089 rows x 1 columns]" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "primary_emails = df[['primary_email_domain', 'orcid']].groupby('primary_email_domain').count().sort_values('orcid', ascending=False)\n", "primary_emails" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "gmail.com", "hotmail.com", "yahoo.com", "163.com", "yuhs.ac", "qq.com", "outlook.com", "126.com", "bu.edu", "usgs.gov", "mail.ru", "yahoo.com.br", "usp.br", "ua.pt", "umich.edu", "ust.hk", "foxmail.com", "uomustansiriyah.edu.iq", "yandex.ru", "uq.edu.au", "ukr.net", "unesp.br", "ucl.ac.uk", "ieee.org", "naver.com", "st-annes.ox.ac.uk", "stcatz.ox.ac.uk", "yahoo.fr", "ucm.es", "live.com" ], "y": [ 26540, 3769, 2614, 2109, 1132, 1056, 940, 762, 630, 584, 575, 458, 457, 300, 290, 277, 258, 247, 242, 235, 225, 218, 207, 204, 187, 184, 184, 172, 171, 163 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Top 30 email domains" }, "xaxis": { "range": [ -0.5, 29.5 ], "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "set_top_n(30)\n", "data = [\n", " go.Bar(\n", " x=primary_emails.sort_values(by=['orcid'], ascending=False).index[:TOP_N],\n", " y=primary_emails.sort_values(by=['orcid'], ascending=False)['orcid'][:TOP_N]\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Top %s email domains' % TOP_N,\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Other emails" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "def extract_email_domains(lst):\n", " res = []\n", " for email in lst:\n", " res.append(email.split('@')[1])\n", " return res" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "df['other_email_domains'] = df['other_emails'].apply(lambda x: extract_email_domains(x) if isinstance(x, list) else x)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...works_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domains
340000-0002-5774-8947111NaNNaNNaN[omah m. williams - duncan]NaNNaN...NaN2014-03-07t04:34:39.598z2019-05-21t17:08:12.202z00000NaN[gmail.com]
11990000-0003-2877-5492110aliasgharkhosroabadiNaNNaNNaNkhosroedc@yahoo.com...[scopus - elsevier]2018-01-19t13:40:29.874z2019-12-11t02:19:08.160z00011yahoo.com[medsab.ac.ir, gmail.com]
19950000-0001-8004-5054111angiolaorlandoNaNNaNNaNangiola.orlando@mib.infn.it...[angiola orlando, crossref]2015-08-31t09:12:02.349z2020-06-22t14:22:31.786z5920531mib.infn.it[ge.infn.it]
23230000-0003-3048-4504111apichatsaejioNaNNaNNaNNaN...[scopus - elsevier]2016-03-06t08:54:15.121z2020-08-28t08:31:15.790z20040NaN[eat.kmutnb.ac.th]
44610000-0001-9961-9732111chunfengyunNaNNaNNaNsallyycf@163.com...[multidisciplinary digital publishing institut...2016-11-22t07:55:23.863z2019-11-26t02:29:35.104z50901163.com[pku.edu.cn]
\n", "

5 rows × 26 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "34 0000-0002-5774-8947 1 1 1 \n", "1199 0000-0003-2877-5492 1 1 0 \n", "1995 0000-0001-8004-5054 1 1 1 \n", "2323 0000-0003-3048-4504 1 1 1 \n", "4461 0000-0001-9961-9732 1 1 1 \n", "\n", " given_names family_name biography other_names urls \\\n", "34 NaN NaN NaN [omah m. williams - duncan] NaN \n", "1199 aliasghar khosroabadi NaN NaN NaN \n", "1995 angiola orlando NaN NaN NaN \n", "2323 apichat saejio NaN NaN NaN \n", "4461 chunfeng yun NaN NaN NaN \n", "\n", " primary_email ... \\\n", "34 NaN ... \n", "1199 khosroedc@yahoo.com ... \n", "1995 angiola.orlando@mib.infn.it ... \n", "2323 NaN ... \n", "4461 sallyycf@163.com ... \n", "\n", " works_source \\\n", "34 NaN \n", "1199 [scopus - elsevier] \n", "1995 [angiola orlando, crossref] \n", "2323 [scopus - elsevier] \n", "4461 [multidisciplinary digital publishing institut... \n", "\n", " activation_date last_update_date n_doi n_arxiv n_pmc \\\n", "34 2014-03-07t04:34:39.598z 2019-05-21t17:08:12.202z 0 0 0 \n", "1199 2018-01-19t13:40:29.874z 2019-12-11t02:19:08.160z 0 0 0 \n", "1995 2015-08-31t09:12:02.349z 2020-06-22t14:22:31.786z 59 2 0 \n", "2323 2016-03-06t08:54:15.121z 2020-08-28t08:31:15.790z 2 0 0 \n", "4461 2016-11-22t07:55:23.863z 2019-11-26t02:29:35.104z 5 0 9 \n", "\n", " n_other_pids label primary_email_domain other_email_domains \n", "34 0 0 NaN [gmail.com] \n", "1199 1 1 yahoo.com [medsab.ac.ir, gmail.com] \n", "1995 53 1 mib.infn.it [ge.infn.it] \n", "2323 4 0 NaN [eat.kmutnb.ac.th] \n", "4461 0 1 163.com [pku.edu.cn] \n", "\n", "[5 rows x 26 columns]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['other_email_domains'].notna()].head()" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "df['n_emails'] = df['other_emails'].str.len()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "0000-0003-4171-3835", "0000-0001-6239-2968", "0000-0003-2151-4089", "0000-0003-2290-2817", "0000-0001-9084-3156", "0000-0001-6349-1044", "0000-0002-2085-1908", "0000-0003-4147-212x", "0000-0002-8565-194x", "0000-0002-7396-1561", "0000-0002-9821-8424", "0000-0003-4327-6827", "0000-0001-9311-0687", "0000-0003-0391-3430", "0000-0002-0776-9547", "0000-0003-2657-8225", "0000-0001-5548-8259", "0000-0003-0671-1543", "0000-0003-1502-3910", "0000-0003-4685-5621", "0000-0002-3165-132x", "0000-0001-8420-9204", "0000-0002-1929-6054", "0000-0002-8390-8238", "0000-0002-9599-6909", "0000-0002-5341-6531", "0000-0003-4499-7300", "0000-0002-1615-8633", "0000-0002-6206-4638", "0000-0003-3405-355x" ], "y": [ 12, 9, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Top 30 ORCiD by email" }, "xaxis": { "range": [ -0.5, 29.5 ], "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "set_top_n(30)\n", "data = [\n", " go.Bar(\n", " x=df.sort_values('n_emails', ascending=False)['orcid'][:TOP_N],\n", " y=df.sort_values('n_emails', ascending=False)['n_emails'][:TOP_N]\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Top %s ORCiD by email' % TOP_N, \n", " xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "grouped_other_emails = df[['orcid', 'other_email_domains']]\\\n", " .explode('other_email_domains')\\\n", " .reset_index(drop=True)\\\n", " .groupby('other_email_domains')\\\n", " .count()\\\n", " .sort_values('orcid', ascending=False)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "gmail.com", "hotmail.com", "yahoo.com", "qq.com", "163.com", "outlook.com", "126.com", "usp.br", "ieee.org", "mail.ru", "yahoo.com.br", "unesp.br", "sbs.ox.ac.uk", "yuhs.ac", "naver.com", "icloud.com", "foxmail.com", "uq.edu.au", "ua.pt", "cam.ac.uk", "imperial.ac.uk", "ukr.net", "law.ox.ac.uk", "mit.edu", "stanford.edu", "monash.edu", "ucl.ac.uk", "education.ox.ac.uk", "ucm.es", "conted.ox.ac.uk" ], "y": [ 11116, 1541, 1295, 779, 774, 425, 260, 236, 224, 149, 147, 141, 136, 133, 130, 118, 96, 94, 89, 84, 77, 76, 75, 74, 71, 70, 68, 67, 66, 64 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Top 30 other email domains" }, "xaxis": { "range": [ -0.5, 29.5 ], "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "set_top_n(30)\n", "data = [\n", " go.Bar(\n", " x=grouped_other_emails.sort_values(by=['orcid'], ascending=False).index[:TOP_N],\n", " y=grouped_other_emails.sort_values(by=['orcid'], ascending=False)['orcid'][:TOP_N]\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Top %s other email domains' % TOP_N, \n", " xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Email speculation" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...activation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsn_emails
340000-0002-5774-8947111NaNNaNNaN[omah m. williams - duncan]NaNNaN...2014-03-07t04:34:39.598z2019-05-21t17:08:12.202z00000NaN[gmail.com]1.0
23230000-0003-3048-4504111apichatsaejioNaNNaNNaNNaN...2016-03-06t08:54:15.121z2020-08-28t08:31:15.790z20040NaN[eat.kmutnb.ac.th]1.0
76220000-0002-5612-7444111friederike m.hesseNaNNaN[[midwifery care - milla hebammenpraxis, http:...NaN...2017-06-10t07:45:11.387z2017-06-10t07:55:03.455z00000NaN[gmail.com, dghwi.de]2.0
79560000-0002-8943-0538111geosunnyNaNNaNNaNNaN...2019-11-30t14:08:11.221z2020-05-15t09:06:25.637z10001NaN[students.cutn.ac.in]1.0
105080000-0002-4022-0580111jean carlosda silva gomesNaNNaN[[currículo lattes, http://lattes.cnpq.br/0026...NaN...2017-05-26t19:09:33.432z2020-06-02t00:23:14.020z20021NaN[letras.ufrj.br]1.0
..................................................................
109150020000-0002-3715-3866111joannakorybut-orlowskaNaN[joanna gołębiewska]NaNNaN...2017-04-27t10:08:48.102z2020-12-08t09:44:59.088z60000NaN[gmail.com]1.0
109153050000-0003-1925-0141111marcoferrettiNaNNaNNaNNaN...2015-02-23t10:29:00.543z2020-11-30t21:58:07.439z70091NaN[itabc.cnr.it]1.0
109154950000-0001-5526-3017111nadiayacoubiNaNNaNNaNNaN...2015-03-10t16:45:31.974z2020-12-11t00:00:01.060z30001NaN[evonik.com]1.0
109158200000-0002-9902-7953111s m mahmudulhasanNaNNaNNaNNaN...2018-01-26t02:18:25.551z2020-11-24t05:37:24.167z70271NaN[gmail.com]1.0
109163060000-0002-5126-5127111andonisneophytouNaNNaNNaNNaN...2017-03-30t17:08:15.383z2020-12-09t16:16:50.762z20030NaN[ucy.ac.cy]1.0
\n", "

19692 rows × 27 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email \\\n", "34 0000-0002-5774-8947 1 1 \n", "2323 0000-0003-3048-4504 1 1 \n", "7622 0000-0002-5612-7444 1 1 \n", "7956 0000-0002-8943-0538 1 1 \n", "10508 0000-0002-4022-0580 1 1 \n", "... ... ... ... \n", "10915002 0000-0002-3715-3866 1 1 \n", "10915305 0000-0003-1925-0141 1 1 \n", "10915495 0000-0001-5526-3017 1 1 \n", "10915820 0000-0002-9902-7953 1 1 \n", "10916306 0000-0002-5126-5127 1 1 \n", "\n", " verified_primary_email given_names family_name biography \\\n", "34 1 NaN NaN NaN \n", "2323 1 apichat saejio NaN \n", "7622 1 friederike m. hesse NaN \n", "7956 1 geo sunny NaN \n", "10508 1 jean carlos da silva gomes NaN \n", "... ... ... ... ... \n", "10915002 1 joanna korybut-orlowska NaN \n", "10915305 1 marco ferretti NaN \n", "10915495 1 nadia yacoubi NaN \n", "10915820 1 s m mahmudul hasan NaN \n", "10916306 1 andonis neophytou NaN \n", "\n", " other_names \\\n", "34 [omah m. williams - duncan] \n", "2323 NaN \n", "7622 NaN \n", "7956 NaN \n", "10508 NaN \n", "... ... \n", "10915002 [joanna gołębiewska] \n", "10915305 NaN \n", "10915495 NaN \n", "10915820 NaN \n", "10916306 NaN \n", "\n", " urls primary_email \\\n", "34 NaN NaN \n", "2323 NaN NaN \n", "7622 [[midwifery care - milla hebammenpraxis, http:... NaN \n", "7956 NaN NaN \n", "10508 [[currículo lattes, http://lattes.cnpq.br/0026... NaN \n", "... ... ... \n", "10915002 NaN NaN \n", "10915305 NaN NaN \n", "10915495 NaN NaN \n", "10915820 NaN NaN \n", "10916306 NaN NaN \n", "\n", " ... activation_date last_update_date n_doi \\\n", "34 ... 2014-03-07t04:34:39.598z 2019-05-21t17:08:12.202z 0 \n", "2323 ... 2016-03-06t08:54:15.121z 2020-08-28t08:31:15.790z 2 \n", "7622 ... 2017-06-10t07:45:11.387z 2017-06-10t07:55:03.455z 0 \n", "7956 ... 2019-11-30t14:08:11.221z 2020-05-15t09:06:25.637z 1 \n", "10508 ... 2017-05-26t19:09:33.432z 2020-06-02t00:23:14.020z 2 \n", "... ... ... ... ... \n", "10915002 ... 2017-04-27t10:08:48.102z 2020-12-08t09:44:59.088z 6 \n", "10915305 ... 2015-02-23t10:29:00.543z 2020-11-30t21:58:07.439z 7 \n", "10915495 ... 2015-03-10t16:45:31.974z 2020-12-11t00:00:01.060z 3 \n", "10915820 ... 2018-01-26t02:18:25.551z 2020-11-24t05:37:24.167z 7 \n", "10916306 ... 2017-03-30t17:08:15.383z 2020-12-09t16:16:50.762z 2 \n", "\n", " n_arxiv n_pmc n_other_pids label primary_email_domain \\\n", "34 0 0 0 0 NaN \n", "2323 0 0 4 0 NaN \n", "7622 0 0 0 0 NaN \n", "7956 0 0 0 1 NaN \n", "10508 0 0 2 1 NaN \n", "... ... ... ... ... ... \n", "10915002 0 0 0 0 NaN \n", "10915305 0 0 9 1 NaN \n", "10915495 0 0 0 1 NaN \n", "10915820 0 2 7 1 NaN \n", "10916306 0 0 3 0 NaN \n", "\n", " other_email_domains n_emails \n", "34 [gmail.com] 1.0 \n", "2323 [eat.kmutnb.ac.th] 1.0 \n", "7622 [gmail.com, dghwi.de] 2.0 \n", "7956 [students.cutn.ac.in] 1.0 \n", "10508 [letras.ufrj.br] 1.0 \n", "... ... ... \n", "10915002 [gmail.com] 1.0 \n", "10915305 [itabc.cnr.it] 1.0 \n", "10915495 [evonik.com] 1.0 \n", "10915820 [gmail.com] 1.0 \n", "10916306 [ucy.ac.cy] 1.0 \n", "\n", "[19692 rows x 27 columns]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['primary_email'].isna() & df['other_emails'].notna()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## URLs" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "def extract_url_domains(lst):\n", " domains = []\n", " for e in lst:\n", " # e[0] is a string describing the url\n", " # e[1] is the url\n", " domain = tldextract.extract(e[1])\n", " domains.append(domain.registered_domain)\n", " return domains" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "df['url_domains'] = df['urls'].apply(lambda x: extract_url_domains(x) if isinstance(x, list) else x)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...last_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsn_emailsurl_domains
90000-0001-8718-0056111NaNNaNNaN[飛資得][[link1, http://orcid.flysheetmed.info], [ntu ...ericlin.flysheet@gmail.com...2019-10-11t17:51:12.473z00061gmail.comNaNNaN[flysheetmed.info, ntu.edu.tw]
410000-0002-7845-4016111NaNNaNNaNNaN[[publication profile, http://publications.lib...NaN...2016-06-06t15:29:36.952z00000NaNNaNNaN[chalmers.se]
590000-0003-0967-6157111NaNNaNNaN[徐興慶][[ntu researcher profile, http://ah.ntu.edu.tw...NaN...2017-03-10t07:30:04.778z120041NaNNaNNaN[ntu.edu.tw, ntu.edu.tw]
1490000-0002-8015-3781111alejandroossorioNaNNaN[[web de la universidad carlos iii de madrid, ...aossorio@di.uc3m.es...2019-07-04t08:47:12.005z00000di.uc3m.esNaNNaN[uc3m.es]
1550000-0003-3444-936x111alessandracaravalearcheologa, con laurea in metodologia e tecnic...NaN[[isma- cnr, http://www.isma.cnr.it/?page_id=1...NaN...2020-05-14t15:54:38.235z700141NaNNaNNaN[cnr.it]
\n", "

5 rows × 28 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "9 0000-0001-8718-0056 1 1 1 \n", "41 0000-0002-7845-4016 1 1 1 \n", "59 0000-0003-0967-6157 1 1 1 \n", "149 0000-0002-8015-3781 1 1 1 \n", "155 0000-0003-3444-936x 1 1 1 \n", "\n", " given_names family_name \\\n", "9 NaN NaN \n", "41 NaN NaN \n", "59 NaN NaN \n", "149 alejandro ossorio \n", "155 alessandra caravale \n", "\n", " biography other_names \\\n", "9 NaN [飛資得] \n", "41 NaN NaN \n", "59 NaN [徐興慶] \n", "149 NaN NaN \n", "155 archeologa, con laurea in metodologia e tecnic... NaN \n", "\n", " urls \\\n", "9 [[link1, http://orcid.flysheetmed.info], [ntu ... \n", "41 [[publication profile, http://publications.lib... \n", "59 [[ntu researcher profile, http://ah.ntu.edu.tw... \n", "149 [[web de la universidad carlos iii de madrid, ... \n", "155 [[isma- cnr, http://www.isma.cnr.it/?page_id=1... \n", "\n", " primary_email ... last_update_date n_doi n_arxiv \\\n", "9 ericlin.flysheet@gmail.com ... 2019-10-11t17:51:12.473z 0 0 \n", "41 NaN ... 2016-06-06t15:29:36.952z 0 0 \n", "59 NaN ... 2017-03-10t07:30:04.778z 12 0 \n", "149 aossorio@di.uc3m.es ... 2019-07-04t08:47:12.005z 0 0 \n", "155 NaN ... 2020-05-14t15:54:38.235z 7 0 \n", "\n", " n_pmc n_other_pids label primary_email_domain other_email_domains \\\n", "9 0 6 1 gmail.com NaN \n", "41 0 0 0 NaN NaN \n", "59 0 4 1 NaN NaN \n", "149 0 0 0 di.uc3m.es NaN \n", "155 0 14 1 NaN NaN \n", "\n", " n_emails url_domains \n", "9 NaN [flysheetmed.info, ntu.edu.tw] \n", "41 NaN [chalmers.se] \n", "59 NaN [ntu.edu.tw, ntu.edu.tw] \n", "149 NaN [uc3m.es] \n", "155 NaN [cnr.it] \n", "\n", "[5 rows x 28 columns]" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['url_domains'].notna()].head()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "df['n_urls'] = df['url_domains'].str.len()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidn_urls
2573750000-0002-1234-835x219.0
36300670000-0001-7478-4539174.0
51960890000-0002-7392-3792169.0
106960590000-0002-6938-9638152.0
68689320000-0002-5710-4041114.0
.........
109165690000-0001-5692-7639NaN
109165700000-0003-1539-0999NaN
109165710000-0003-2858-5509NaN
109165720000-0003-2438-9500NaN
109165730000-0003-4119-4772NaN
\n", "

10916574 rows × 2 columns

\n", "
" ], "text/plain": [ " orcid n_urls\n", "257375 0000-0002-1234-835x 219.0\n", "3630067 0000-0001-7478-4539 174.0\n", "5196089 0000-0002-7392-3792 169.0\n", "10696059 0000-0002-6938-9638 152.0\n", "6868932 0000-0002-5710-4041 114.0\n", "... ... ...\n", "10916569 0000-0001-5692-7639 NaN\n", "10916570 0000-0003-1539-0999 NaN\n", "10916571 0000-0003-2858-5509 NaN\n", "10916572 0000-0003-2438-9500 NaN\n", "10916573 0000-0003-4119-4772 NaN\n", "\n", "[10916574 rows x 2 columns]" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values('n_urls', ascending=False)[['orcid', 'n_urls']]" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "0000-0002-1234-835x", "0000-0001-7478-4539", "0000-0002-7392-3792", "0000-0002-6938-9638", "0000-0002-5710-4041", "0000-0003-2450-090x", "0000-0002-3920-7389", "0000-0001-5384-0001", "0000-0002-6689-4129", "0000-0002-4621-5571", "0000-0002-7754-8889", "0000-0001-9131-1266", "0000-0002-5250-1144", "0000-0002-9025-8632", "0000-0002-7456-3848", "0000-0003-0176-1293", "0000-0003-0321-7339", "0000-0002-8493-0402", "0000-0002-9965-2425", "0000-0001-8873-6677", "0000-0002-3997-5070", "0000-0002-1856-6905", "0000-0002-4316-1467", "0000-0002-4062-3603", "0000-0001-5880-7091", "0000-0003-0594-2462", "0000-0003-1524-6268", "0000-0002-0752-7513", "0000-0003-2593-7134", "0000-0002-1298-5252", "0000-0003-2383-8386", "0000-0003-1761-3842", "0000-0003-3546-2312", "0000-0002-2886-9248", "0000-0003-4948-9268", "0000-0003-2183-8112", "0000-0002-1929-6054", "0000-0003-2407-3557", "0000-0001-7133-6896", "0000-0002-9276-6921", "0000-0002-4305-4215", "0000-0003-1484-6958", "0000-0002-7568-3403", "0000-0002-4004-6666", "0000-0003-0796-0234", "0000-0002-8208-0897", "0000-0003-4993-5555", "0000-0002-8116-9611", "0000-0003-0930-6121", "0000-0002-9071-5450", "0000-0002-8122-879x", "0000-0002-3277-9659", "0000-0001-9559-1103", "0000-0003-2862-6315", "0000-0002-2000-8339", "0000-0001-5300-4601", "0000-0002-6547-0172", "0000-0003-4808-6619", "0000-0002-5139-2660", "0000-0002-6254-8683", "0000-0002-0971-9375", "0000-0003-3933-0229", "0000-0003-1585-1134", "0000-0003-0694-1154", "0000-0002-4659-5391", "0000-0001-6461-2573", "0000-0001-6783-2037", "0000-0003-4501-3756", "0000-0002-2916-2893", "0000-0001-5549-6822", "0000-0003-4326-9336", "0000-0001-8978-4830", "0000-0002-8940-3177", "0000-0001-8096-4333", "0000-0002-6680-1703", "0000-0002-5946-1595", "0000-0002-8593-9257", "0000-0002-5196-4905", "0000-0002-7653-4899", "0000-0003-1904-4188", "0000-0001-6921-0426", "0000-0001-8808-4867", "0000-0003-1815-1993", "0000-0001-8644-2114", "0000-0003-1675-2840", "0000-0002-7843-8497", "0000-0001-7784-0583", "0000-0001-8986-2528", "0000-0002-5265-6074", "0000-0001-7550-5802", "0000-0003-0907-9870", "0000-0002-0696-8560", "0000-0002-3334-9386", "0000-0002-7179-6953", "0000-0001-6979-4273", "0000-0001-9102-8639", "0000-0002-8797-6502", "0000-0001-9119-5955", "0000-0001-7608-9433", "0000-0002-5985-9114" ], "y": [ 219, 174, 169, 152, 114, 114, 111, 104, 104, 90, 83, 83, 81, 81, 80, 80, 80, 76, 73, 72, 71, 70, 69, 69, 68, 68, 68, 68, 67, 67, 66, 66, 65, 64, 61, 61, 61, 59, 57, 57, 57, 57, 57, 57, 57, 56, 55, 55, 55, 55, 50, 50, 50, 49, 49, 48, 48, 48, 48, 48, 47, 47, 46, 46, 46, 45, 45, 45, 45, 44, 43, 43, 43, 43, 42, 42, 42, 41, 41, 41, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 38, 37, 37, 37, 37, 37, 36, 36, 36, 36 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Top 100 ORCID with URLs" }, "xaxis": { "range": [ -0.5, 99.5 ], "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "set_top_n(100)\n", "data = [\n", " go.Bar(\n", " x=df.sort_values(by=['n_urls'], ascending=False)['orcid'][:TOP_N],\n", " y=df.sort_values(by=['n_urls'], ascending=False)['n_urls'][:TOP_N]\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Top %s ORCID with URLs' % TOP_N,\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "grouped_urls = df[['orcid', 'url_domains']]\\\n", " .explode('url_domains')\\\n", " .reset_index(drop=True)\\\n", " .groupby('url_domains')\\\n", " .count()\\\n", " .sort_values('orcid', ascending=False)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "linkedin.com", "researchgate.net", "google.com", "cnpq.br", "academia.edu", "twitter.com", "facebook.com", "publons.com", "wordpress.com", "mendeley.com", "instagram.com", "github.io", "google.com.ua", "blogspot.com", "github.com", "google.es", "helsinki.fi", "unirioja.es", "youtube.com", "wixsite.com", "ku.dk", "scopus.com", "", "weebly.com", "us.es", "kth.se", "cityu.edu.hk", "kcl.ac.uk", "au.dk", "man.ac.uk" ], "y": [ 77558, 67357, 44397, 24439, 21054, 18771, 15121, 10622, 8996, 6978, 5881, 5479, 5335, 5240, 5199, 5134, 4711, 4572, 4396, 4120, 3756, 3558, 3494, 3115, 3034, 2952, 2793, 2720, 2717, 2693 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Top 30 URL domains" }, "xaxis": { "range": [ -0.5, 29.5 ], "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "set_top_n(30)\n", "data = [\n", " go.Bar(\n", " x=grouped_urls.sort_values(by=['orcid'], ascending=False).index[:TOP_N],\n", " y=grouped_urls.sort_values(by=['orcid'], ascending=False)['orcid'][:TOP_N]\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Top %s URL domains' % TOP_N,\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...n_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsn_emailsurl_domainsn_urls
3824970000-0002-9025-8632111buycannabisdispensarywe procure and deliver premium cannabis strain...[we procure and deliver premium cannabis strai...[[find your cannabis & marijuana dispensary , ...NaN...00000NaNNaNNaN[goowonderland.com, goowonderland.com, goowond...81.0
9118110000-0002-4062-3603111juan de diosbeltrán mancillajuan de dios beltrán mancilla (*) filósofo aut...[juan de dios beltrán mancilla, filósofo autod...[[01.- juan de dios beltrán mancilla. teoría o...NaN...00070NaNNaNNaN[yumpu.com, ijopm.org, google.com, blogspot.co...69.0
11361290000-0002-1929-6054111franklin américocanaza choquedocente-investigador social. maestrando en der...[franklin américo canaza-choque , franklin a. ...[[consejo nacional de ciencia, tecnología e in...leo_123fa@hotmail.com...2900331hotmail.com[gmail.com, gmail.com, hotmail.com, baldwin.ed...5.0[concytec.gob.pe, redalyc.org, redalyc.org, un...61.0
31026860000-0003-2593-7134111aanjaelaniall my papers can be downloaded from portal:re...[jaelani, a., jaelani, aan][[microsoft academic research, https://academi...aan_jaelani@syekhnurjati.ac.id...88001931syekhnurjati.ac.id[gmail.com]1.0[microsoft.com, twitter.com, academia.edu, aca...67.0
68689320000-0002-5710-4041111ryszardromaniukprofessor of electronics and communications en...[r.romaniuk, r.s.romaniuk, ryszard romaniuk, r...[[scholar google, http://scholar.google.pl/cit...rrom@ise.pw.edu.pl...122125017421ise.pw.edu.pl[ise.pw.edu.pl, elka.pw.edu.pl, cern.ch]3.0[google.pl, publons.com, scopus.com, mendeley....114.0
80889870000-0002-9965-2425111jaroslawspychalajaroslaw spychala has received a doctoral degr...[jaroslaw jozef spychala][[resume, http://www.biowebspin.com/wp-content...NaN...1500291NaNNaNNaN[biowebspin.com, biowebspin.com, google.com, l...73.0
86583550000-0002-3920-7389111а.гусевsurname, name gusev alexander leonidovichdate...[alexander l. gusev , alexander leonidovich gu...[[a.l. gusev alternative energy and ecology, ...NaN...3700211NaNNaNNaN[youtube.com, isjaee.com, researchgate.net, re...111.0
87788640000-0002-3997-5070111dr. parameshacharib ddr. parameshachari b dacm distinguished speake...[dr. parameshachari b d][[gsssietw,mysuru, http://geethashishu.in/], [...NaN...4700481NaNNaNNaN[geethashishu.in, geethashishu.in, acm.org, go...71.0
99801640000-0003-4948-9268111gustavoduperrégustavo norberto duperré graduated in arts and...[gustavo norberto duperré, duperré, g. n., gus...[[gis in cultural heritage - icomos românia, h...gustavo.duperre@usal.edu.ar...1300340usal.edu.arNaNNaN[icomos.ro, unirioja.es, unirioja.es, unc.edu....61.0
100245010000-0003-2407-3557111abdulazizabdul aziz was born on may 25, 1973, in brebes...[abdul aziz, aziz, abdul, aziz, a., aziz, abd,...[[google scholar, https://scholar.google.com/c...NaN...1900771NaNNaNNaN[google.com, syekhnurjati.ac.id, orcid.org, bl...59.0
100911650000-0003-2183-8112111pelayo munhozoleapós-doutorado em gestão ambiental pela univers...[ munhoz, pelayo olea, olea, pelayo, olea, p...[[currículo lattes, http://lattes.cnpq.br/6209...NaN...797015821NaNNaNNaN[cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c...61.0
105232050000-0003-2450-090x111eduardbabulakprofessor eduard babulak is accomplished inter...[professor eduard babulak][[honorary chair, chief mentor & senior adviso...NaN...199011741NaNNaNNaN[worldassessmentcouncil.org, spseke.sk, bcs.or...114.0
106960590000-0002-6938-9638111adolfocatral sanabriamy education is in computer science, mathemati...NaN[[researchgate adolfo catral , https://www.res...NaN...202200161NaNNaNNaN[researchgate.net, youtube.com, linkedin.com, ...152.0
\n", "

13 rows × 29 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email \\\n", "382497 0000-0002-9025-8632 1 1 \n", "911811 0000-0002-4062-3603 1 1 \n", "1136129 0000-0002-1929-6054 1 1 \n", "3102686 0000-0003-2593-7134 1 1 \n", "6868932 0000-0002-5710-4041 1 1 \n", "8088987 0000-0002-9965-2425 1 1 \n", "8658355 0000-0002-3920-7389 1 1 \n", "8778864 0000-0002-3997-5070 1 1 \n", "9980164 0000-0003-4948-9268 1 1 \n", "10024501 0000-0003-2407-3557 1 1 \n", "10091165 0000-0003-2183-8112 1 1 \n", "10523205 0000-0003-2450-090x 1 1 \n", "10696059 0000-0002-6938-9638 1 1 \n", "\n", " verified_primary_email given_names family_name \\\n", "382497 1 buycannabis dispensary \n", "911811 1 juan de dios beltrán mancilla \n", "1136129 1 franklin américo canaza choque \n", "3102686 1 aan jaelani \n", "6868932 1 ryszard romaniuk \n", "8088987 1 jaroslaw spychala \n", "8658355 1 а. гусев \n", "8778864 1 dr. parameshachari b d \n", "9980164 1 gustavo duperré \n", "10024501 1 abdul aziz \n", "10091165 1 pelayo munhoz olea \n", "10523205 1 eduard babulak \n", "10696059 1 adolfo catral sanabria \n", "\n", " biography \\\n", "382497 we procure and deliver premium cannabis strain... \n", "911811 juan de dios beltrán mancilla (*) filósofo aut... \n", "1136129 docente-investigador social. maestrando en der... \n", "3102686 all my papers can be downloaded from portal:re... \n", "6868932 professor of electronics and communications en... \n", "8088987 jaroslaw spychala has received a doctoral degr... \n", "8658355 surname, name gusev alexander leonidovichdate... \n", "8778864 dr. parameshachari b dacm distinguished speake... \n", "9980164 gustavo norberto duperré graduated in arts and... \n", "10024501 abdul aziz was born on may 25, 1973, in brebes... \n", "10091165 pós-doutorado em gestão ambiental pela univers... \n", "10523205 professor eduard babulak is accomplished inter... \n", "10696059 my education is in computer science, mathemati... \n", "\n", " other_names \\\n", "382497 [we procure and deliver premium cannabis strai... \n", "911811 [juan de dios beltrán mancilla, filósofo autod... \n", "1136129 [franklin américo canaza-choque , franklin a. ... \n", "3102686 [jaelani, a., jaelani, aan] \n", "6868932 [r.romaniuk, r.s.romaniuk, ryszard romaniuk, r... \n", "8088987 [jaroslaw jozef spychala] \n", "8658355 [alexander l. gusev , alexander leonidovich gu... \n", "8778864 [dr. parameshachari b d] \n", "9980164 [gustavo norberto duperré, duperré, g. n., gus... \n", "10024501 [abdul aziz, aziz, abdul, aziz, a., aziz, abd,... \n", "10091165 [ munhoz, pelayo olea, olea, pelayo, olea, p... \n", "10523205 [professor eduard babulak] \n", "10696059 NaN \n", "\n", " urls \\\n", "382497 [[find your cannabis & marijuana dispensary , ... \n", "911811 [[01.- juan de dios beltrán mancilla. teoría o... \n", "1136129 [[consejo nacional de ciencia, tecnología e in... \n", "3102686 [[microsoft academic research, https://academi... \n", "6868932 [[scholar google, http://scholar.google.pl/cit... \n", "8088987 [[resume, http://www.biowebspin.com/wp-content... \n", "8658355 [[a.l. gusev alternative energy and ecology, ... \n", "8778864 [[gsssietw,mysuru, http://geethashishu.in/], [... \n", "9980164 [[gis in cultural heritage - icomos românia, h... \n", "10024501 [[google scholar, https://scholar.google.com/c... \n", "10091165 [[currículo lattes, http://lattes.cnpq.br/6209... \n", "10523205 [[honorary chair, chief mentor & senior adviso... \n", "10696059 [[researchgate adolfo catral , https://www.res... \n", "\n", " primary_email ... n_doi n_arxiv n_pmc \\\n", "382497 NaN ... 0 0 0 \n", "911811 NaN ... 0 0 0 \n", "1136129 leo_123fa@hotmail.com ... 29 0 0 \n", "3102686 aan_jaelani@syekhnurjati.ac.id ... 88 0 0 \n", "6868932 rrom@ise.pw.edu.pl ... 1221 25 0 \n", "8088987 NaN ... 15 0 0 \n", "8658355 NaN ... 37 0 0 \n", "8778864 NaN ... 47 0 0 \n", "9980164 gustavo.duperre@usal.edu.ar ... 13 0 0 \n", "10024501 NaN ... 19 0 0 \n", "10091165 NaN ... 797 0 1 \n", "10523205 NaN ... 199 0 1 \n", "10696059 NaN ... 2022 0 0 \n", "\n", " n_other_pids label primary_email_domain \\\n", "382497 0 0 NaN \n", "911811 7 0 NaN \n", "1136129 33 1 hotmail.com \n", "3102686 193 1 syekhnurjati.ac.id \n", "6868932 1742 1 ise.pw.edu.pl \n", "8088987 29 1 NaN \n", "8658355 21 1 NaN \n", "8778864 48 1 NaN \n", "9980164 34 0 usal.edu.ar \n", "10024501 77 1 NaN \n", "10091165 582 1 NaN \n", "10523205 174 1 NaN \n", "10696059 16 1 NaN \n", "\n", " other_email_domains n_emails \\\n", "382497 NaN NaN \n", "911811 NaN NaN \n", "1136129 [gmail.com, gmail.com, hotmail.com, baldwin.ed... 5.0 \n", "3102686 [gmail.com] 1.0 \n", "6868932 [ise.pw.edu.pl, elka.pw.edu.pl, cern.ch] 3.0 \n", "8088987 NaN NaN \n", "8658355 NaN NaN \n", "8778864 NaN NaN \n", "9980164 NaN NaN \n", "10024501 NaN NaN \n", "10091165 NaN NaN \n", "10523205 NaN NaN \n", "10696059 NaN NaN \n", "\n", " url_domains n_urls \n", "382497 [goowonderland.com, goowonderland.com, goowond... 81.0 \n", "911811 [yumpu.com, ijopm.org, google.com, blogspot.co... 69.0 \n", "1136129 [concytec.gob.pe, redalyc.org, redalyc.org, un... 61.0 \n", "3102686 [microsoft.com, twitter.com, academia.edu, aca... 67.0 \n", "6868932 [google.pl, publons.com, scopus.com, mendeley.... 114.0 \n", "8088987 [biowebspin.com, biowebspin.com, google.com, l... 73.0 \n", "8658355 [youtube.com, isjaee.com, researchgate.net, re... 111.0 \n", "8778864 [geethashishu.in, geethashishu.in, acm.org, go... 71.0 \n", "9980164 [icomos.ro, unirioja.es, unirioja.es, unc.edu.... 61.0 \n", "10024501 [google.com, syekhnurjati.ac.id, orcid.org, bl... 59.0 \n", "10091165 [cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c... 61.0 \n", "10523205 [worldassessmentcouncil.org, spseke.sk, bcs.or... 114.0 \n", "10696059 [researchgate.net, youtube.com, linkedin.com, ... 152.0 \n", "\n", "[13 rows x 29 columns]" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[(df['url_domains'].str.len() > 50) & (df['n_works'] > 0)]" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...n_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsn_emailsurl_domainsn_urls
976660000-0002-7843-8497111davibarbosapesquisador na área sociojurídica, professor, ...[professor davi barbosa delmont][[plataforma de cursos ideia criativa, https:/...NaN...00000NaNNaNNaN[eadplataforma.com, facebook.com, youtube.com,...39.0
2006700000-0003-1554-1531111katarzynaochmankatarzyna ochman [kataˈʐɨna ˈɔxman] is assista...[[kataˈʐɨna ˈɔxman], catharina ochman, cathari...[[researchgate, https://www.researchgate.net/p...NaN...10001NaNNaNNaN[researchgate.net, academia.edu, facebook.com,...11.0
2103250000-0003-3080-4643111grahamdawsonscience and engineering faculty (sef) libraria...[ graham colin dawson, g.c. dawson][[qut home page, https://www.library.qut.edu.a...g.dawson@qut.edu.au...00061qut.edu.auNaNNaN[qut.edu.au, qut.edu.au, google.com.au, resear...11.0
2189470000-0003-3193-030x111juan pablowolff mejiaaspirante a maestría en derecho y negocios int...[juan pablo wolff, pablo wolff mejia, juan p. ...[[twitter, https://twitter.com/pablomejiam], [...juanpmejia@ulasallista.edu.co...00001ulasallista.edu.coNaNNaN[twitter.com, youtube.com, google.com, linkedi...11.0
2619740000-0002-5341-6531111trenthammondmr trent hammond is an honorary research fello...[trent ernest hammond (t.e.hammond)][[academic support masters, http://trenthammon...trent.hammond@academicsupportmasters.com.au...10011academicsupportmasters.com.au[health.nsw.gov.au, csu.edu.au, sociologist.co...5.0[wix.com, academia.edu, researchgate.net, rese...12.0
..................................................................
104057380000-0002-3374-5709111guillermoortizmédico, internista, neumólogo, intensivista, e...[guillermo ortiz-ruiz][[elsevier, https://www.elsevier.com/], [asoci...NaN...6200880NaNNaNNaN[elsevier.com, amci.org.co, springer.com, revi...12.0
104722640000-0001-7228-5680111textprotocolNaNNaN[[about, https://about.me/textprotocol], [gith...NaN...00000NaNNaNNaN[about.me, github.com, gitlab.com, gravatar.co...12.0
107859610000-0002-3064-0194111leonardo fernandocruz bassoNaNNaN[[papers-1, https://www.researchgate.net/profi...leonardofernando.basso@mackenzie.br...50001mackenzie.br[mackenzie.br]1.0[researchgate.net, ssrn.com, cnpq.br, google.c...17.0
108456450000-0003-1047-4229111bayusaktibayu purbha saktisaya adalah bayu purbha sakti...[bayu purbha sakti][[osf, http://osf.io/qe2ug], [inarxiv, https:/...NaN...00001NaNNaNNaN[osf.io, osf.io, academia.edu, mendeley.com, f...12.0
108960590000-0003-4836-7074111karla haydeeortiz palafoxkarla haydee ortíz palafoxmiembro del sistema ...[karla palafox][[opinión día del maestro, http://www.cronicaj...NaN...00021NaNNaNNaN[cronicajalisco.com, youtube.com, tlaquepaque....22.0
\n", "

141 rows × 29 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email \\\n", "97666 0000-0002-7843-8497 1 1 \n", "200670 0000-0003-1554-1531 1 1 \n", "210325 0000-0003-3080-4643 1 1 \n", "218947 0000-0003-3193-030x 1 1 \n", "261974 0000-0002-5341-6531 1 1 \n", "... ... ... ... \n", "10405738 0000-0002-3374-5709 1 1 \n", "10472264 0000-0001-7228-5680 1 1 \n", "10785961 0000-0002-3064-0194 1 1 \n", "10845645 0000-0003-1047-4229 1 1 \n", "10896059 0000-0003-4836-7074 1 1 \n", "\n", " verified_primary_email given_names family_name \\\n", "97666 1 davi barbosa \n", "200670 1 katarzyna ochman \n", "210325 1 graham dawson \n", "218947 1 juan pablo wolff mejia \n", "261974 1 trent hammond \n", "... ... ... ... \n", "10405738 1 guillermo ortiz \n", "10472264 1 text protocol \n", "10785961 1 leonardo fernando cruz basso \n", "10845645 1 bayu sakti \n", "10896059 1 karla haydee ortiz palafox \n", "\n", " biography \\\n", "97666 pesquisador na área sociojurídica, professor, ... \n", "200670 katarzyna ochman [kataˈʐɨna ˈɔxman] is assista... \n", "210325 science and engineering faculty (sef) libraria... \n", "218947 aspirante a maestría en derecho y negocios int... \n", "261974 mr trent hammond is an honorary research fello... \n", "... ... \n", "10405738 médico, internista, neumólogo, intensivista, e... \n", "10472264 NaN \n", "10785961 NaN \n", "10845645 bayu purbha saktisaya adalah bayu purbha sakti... \n", "10896059 karla haydee ortíz palafoxmiembro del sistema ... \n", "\n", " other_names \\\n", "97666 [professor davi barbosa delmont] \n", "200670 [[kataˈʐɨna ˈɔxman], catharina ochman, cathari... \n", "210325 [ graham colin dawson, g.c. dawson] \n", "218947 [juan pablo wolff, pablo wolff mejia, juan p. ... \n", "261974 [trent ernest hammond (t.e.hammond)] \n", "... ... \n", "10405738 [guillermo ortiz-ruiz] \n", "10472264 NaN \n", "10785961 NaN \n", "10845645 [bayu purbha sakti] \n", "10896059 [karla palafox] \n", "\n", " urls \\\n", "97666 [[plataforma de cursos ideia criativa, https:/... \n", "200670 [[researchgate, https://www.researchgate.net/p... \n", "210325 [[qut home page, https://www.library.qut.edu.a... \n", "218947 [[twitter, https://twitter.com/pablomejiam], [... \n", "261974 [[academic support masters, http://trenthammon... \n", "... ... \n", "10405738 [[elsevier, https://www.elsevier.com/], [asoci... \n", "10472264 [[about, https://about.me/textprotocol], [gith... \n", "10785961 [[papers-1, https://www.researchgate.net/profi... \n", "10845645 [[osf, http://osf.io/qe2ug], [inarxiv, https:/... \n", "10896059 [[opinión día del maestro, http://www.cronicaj... \n", "\n", " primary_email ... n_doi n_arxiv \\\n", "97666 NaN ... 0 0 \n", "200670 NaN ... 1 0 \n", "210325 g.dawson@qut.edu.au ... 0 0 \n", "218947 juanpmejia@ulasallista.edu.co ... 0 0 \n", "261974 trent.hammond@academicsupportmasters.com.au ... 1 0 \n", "... ... ... ... ... \n", "10405738 NaN ... 62 0 \n", "10472264 NaN ... 0 0 \n", "10785961 leonardofernando.basso@mackenzie.br ... 5 0 \n", "10845645 NaN ... 0 0 \n", "10896059 NaN ... 0 0 \n", "\n", " n_pmc n_other_pids label primary_email_domain \\\n", "97666 0 0 0 NaN \n", "200670 0 0 1 NaN \n", "210325 0 6 1 qut.edu.au \n", "218947 0 0 1 ulasallista.edu.co \n", "261974 0 1 1 academicsupportmasters.com.au \n", "... ... ... ... ... \n", "10405738 0 88 0 NaN \n", "10472264 0 0 0 NaN \n", "10785961 0 0 1 mackenzie.br \n", "10845645 0 0 1 NaN \n", "10896059 0 2 1 NaN \n", "\n", " other_email_domains n_emails \\\n", "97666 NaN NaN \n", "200670 NaN NaN \n", "210325 NaN NaN \n", "218947 NaN NaN \n", "261974 [health.nsw.gov.au, csu.edu.au, sociologist.co... 5.0 \n", "... ... ... \n", "10405738 NaN NaN \n", "10472264 NaN NaN \n", "10785961 [mackenzie.br] 1.0 \n", "10845645 NaN NaN \n", "10896059 NaN NaN \n", "\n", " url_domains n_urls \n", "97666 [eadplataforma.com, facebook.com, youtube.com,... 39.0 \n", "200670 [researchgate.net, academia.edu, facebook.com,... 11.0 \n", "210325 [qut.edu.au, qut.edu.au, google.com.au, resear... 11.0 \n", "218947 [twitter.com, youtube.com, google.com, linkedi... 11.0 \n", "261974 [wix.com, academia.edu, researchgate.net, rese... 12.0 \n", "... ... ... \n", "10405738 [elsevier.com, amci.org.co, springer.com, revi... 12.0 \n", "10472264 [about.me, github.com, gitlab.com, gravatar.co... 12.0 \n", "10785961 [researchgate.net, ssrn.com, cnpq.br, google.c... 17.0 \n", "10845645 [osf.io, osf.io, academia.edu, mendeley.com, f... 12.0 \n", "10896059 [cronicajalisco.com, youtube.com, tlaquepaque.... 22.0 \n", "\n", "[141 rows x 29 columns]" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)]" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...n_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsn_emailsurl_domainsn_urls
00000-0002-7843-8497111davibarbosapesquisador na área sociojurídica, professor, ...[professor davi barbosa delmont][[plataforma de cursos ideia criativa, https:/...NaN...00000NaNNaNNaN[eadplataforma.com, facebook.com, youtube.com,...39.0
10000-0003-1554-1531111katarzynaochmankatarzyna ochman [kataˈʐɨna ˈɔxman] is assista...[[kataˈʐɨna ˈɔxman], catharina ochman, cathari...[[researchgate, https://www.researchgate.net/p...NaN...10001NaNNaNNaN[researchgate.net, academia.edu, facebook.com,...11.0
20000-0003-3080-4643111grahamdawsonscience and engineering faculty (sef) libraria...[ graham colin dawson, g.c. dawson][[qut home page, https://www.library.qut.edu.a...g.dawson@qut.edu.au...00061qut.edu.auNaNNaN[qut.edu.au, qut.edu.au, google.com.au, resear...11.0
30000-0003-3193-030x111juan pablowolff mejiaaspirante a maestría en derecho y negocios int...[juan pablo wolff, pablo wolff mejia, juan p. ...[[twitter, https://twitter.com/pablomejiam], [...juanpmejia@ulasallista.edu.co...00001ulasallista.edu.coNaNNaN[twitter.com, youtube.com, google.com, linkedi...11.0
40000-0002-5341-6531111trenthammondmr trent hammond is an honorary research fello...[trent ernest hammond (t.e.hammond)][[academic support masters, http://trenthammon...trent.hammond@academicsupportmasters.com.au...10011academicsupportmasters.com.au[health.nsw.gov.au, csu.edu.au, sociologist.co...5.0[wix.com, academia.edu, researchgate.net, rese...12.0
..................................................................
1360000-0002-3374-5709111guillermoortizmédico, internista, neumólogo, intensivista, e...[guillermo ortiz-ruiz][[elsevier, https://www.elsevier.com/], [asoci...NaN...6200880NaNNaNNaN[elsevier.com, amci.org.co, springer.com, revi...12.0
1370000-0001-7228-5680111textprotocolNaNNaN[[about, https://about.me/textprotocol], [gith...NaN...00000NaNNaNNaN[about.me, github.com, gitlab.com, gravatar.co...12.0
1380000-0002-3064-0194111leonardo fernandocruz bassoNaNNaN[[papers-1, https://www.researchgate.net/profi...leonardofernando.basso@mackenzie.br...50001mackenzie.br[mackenzie.br]1.0[researchgate.net, ssrn.com, cnpq.br, google.c...17.0
1390000-0003-1047-4229111bayusaktibayu purbha saktisaya adalah bayu purbha sakti...[bayu purbha sakti][[osf, http://osf.io/qe2ug], [inarxiv, https:/...NaN...00001NaNNaNNaN[osf.io, osf.io, academia.edu, mendeley.com, f...12.0
1400000-0003-4836-7074111karla haydeeortiz palafoxkarla haydee ortíz palafoxmiembro del sistema ...[karla palafox][[opinión día del maestro, http://www.cronicaj...NaN...00021NaNNaNNaN[cronicajalisco.com, youtube.com, tlaquepaque....22.0
\n", "

141 rows × 29 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "0 0000-0002-7843-8497 1 1 1 \n", "1 0000-0003-1554-1531 1 1 1 \n", "2 0000-0003-3080-4643 1 1 1 \n", "3 0000-0003-3193-030x 1 1 1 \n", "4 0000-0002-5341-6531 1 1 1 \n", ".. ... ... ... ... \n", "136 0000-0002-3374-5709 1 1 1 \n", "137 0000-0001-7228-5680 1 1 1 \n", "138 0000-0002-3064-0194 1 1 1 \n", "139 0000-0003-1047-4229 1 1 1 \n", "140 0000-0003-4836-7074 1 1 1 \n", "\n", " given_names family_name \\\n", "0 davi barbosa \n", "1 katarzyna ochman \n", "2 graham dawson \n", "3 juan pablo wolff mejia \n", "4 trent hammond \n", ".. ... ... \n", "136 guillermo ortiz \n", "137 text protocol \n", "138 leonardo fernando cruz basso \n", "139 bayu sakti \n", "140 karla haydee ortiz palafox \n", "\n", " biography \\\n", "0 pesquisador na área sociojurídica, professor, ... \n", "1 katarzyna ochman [kataˈʐɨna ˈɔxman] is assista... \n", "2 science and engineering faculty (sef) libraria... \n", "3 aspirante a maestría en derecho y negocios int... \n", "4 mr trent hammond is an honorary research fello... \n", ".. ... \n", "136 médico, internista, neumólogo, intensivista, e... \n", "137 NaN \n", "138 NaN \n", "139 bayu purbha saktisaya adalah bayu purbha sakti... \n", "140 karla haydee ortíz palafoxmiembro del sistema ... \n", "\n", " other_names \\\n", "0 [professor davi barbosa delmont] \n", "1 [[kataˈʐɨna ˈɔxman], catharina ochman, cathari... \n", "2 [ graham colin dawson, g.c. dawson] \n", "3 [juan pablo wolff, pablo wolff mejia, juan p. ... \n", "4 [trent ernest hammond (t.e.hammond)] \n", ".. ... \n", "136 [guillermo ortiz-ruiz] \n", "137 NaN \n", "138 NaN \n", "139 [bayu purbha sakti] \n", "140 [karla palafox] \n", "\n", " urls \\\n", "0 [[plataforma de cursos ideia criativa, https:/... \n", "1 [[researchgate, https://www.researchgate.net/p... \n", "2 [[qut home page, https://www.library.qut.edu.a... \n", "3 [[twitter, https://twitter.com/pablomejiam], [... \n", "4 [[academic support masters, http://trenthammon... \n", ".. ... \n", "136 [[elsevier, https://www.elsevier.com/], [asoci... \n", "137 [[about, https://about.me/textprotocol], [gith... \n", "138 [[papers-1, https://www.researchgate.net/profi... \n", "139 [[osf, http://osf.io/qe2ug], [inarxiv, https:/... \n", "140 [[opinión día del maestro, http://www.cronicaj... \n", "\n", " primary_email ... n_doi n_arxiv n_pmc \\\n", "0 NaN ... 0 0 0 \n", "1 NaN ... 1 0 0 \n", "2 g.dawson@qut.edu.au ... 0 0 0 \n", "3 juanpmejia@ulasallista.edu.co ... 0 0 0 \n", "4 trent.hammond@academicsupportmasters.com.au ... 1 0 0 \n", ".. ... ... ... ... ... \n", "136 NaN ... 62 0 0 \n", "137 NaN ... 0 0 0 \n", "138 leonardofernando.basso@mackenzie.br ... 5 0 0 \n", "139 NaN ... 0 0 0 \n", "140 NaN ... 0 0 0 \n", "\n", " n_other_pids label primary_email_domain \\\n", "0 0 0 NaN \n", "1 0 1 NaN \n", "2 6 1 qut.edu.au \n", "3 0 1 ulasallista.edu.co \n", "4 1 1 academicsupportmasters.com.au \n", ".. ... ... ... \n", "136 88 0 NaN \n", "137 0 0 NaN \n", "138 0 1 mackenzie.br \n", "139 0 1 NaN \n", "140 2 1 NaN \n", "\n", " other_email_domains n_emails \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 [health.nsw.gov.au, csu.edu.au, sociologist.co... 5.0 \n", ".. ... ... \n", "136 NaN NaN \n", "137 NaN NaN \n", "138 [mackenzie.br] 1.0 \n", "139 NaN NaN \n", "140 NaN NaN \n", "\n", " url_domains n_urls \n", "0 [eadplataforma.com, facebook.com, youtube.com,... 39.0 \n", "1 [researchgate.net, academia.edu, facebook.com,... 11.0 \n", "2 [qut.edu.au, qut.edu.au, google.com.au, resear... 11.0 \n", "3 [twitter.com, youtube.com, google.com, linkedi... 11.0 \n", "4 [wix.com, academia.edu, researchgate.net, rese... 12.0 \n", ".. ... ... \n", "136 [elsevier.com, amci.org.co, springer.com, revi... 12.0 \n", "137 [about.me, github.com, gitlab.com, gravatar.co... 12.0 \n", "138 [researchgate.net, ssrn.com, cnpq.br, google.c... 17.0 \n", "139 [osf.io, osf.io, academia.edu, mendeley.com, f... 12.0 \n", "140 [cronicajalisco.com, youtube.com, tlaquepaque.... 22.0 \n", "\n", "[141 rows x 29 columns]" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "exploded_sources = df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)].explode('works_source').reset_index(drop=True)\n", "exploded_sources" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...n_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsn_emailsurl_domainsn_urls
00000-0002-7843-8497111davibarbosapesquisador na área sociojurídica, professor, ...[professor davi barbosa delmont][[plataforma de cursos ideia criativa, https:/...NaN...00000NaNNaNNaN[eadplataforma.com, facebook.com, youtube.com,...39.0
10000-0003-1554-1531111katarzynaochmankatarzyna ochman [kataˈʐɨna ˈɔxman] is assista...[[kataˈʐɨna ˈɔxman], catharina ochman, cathari...[[researchgate, https://www.researchgate.net/p...NaN...10001NaNNaNNaN[researchgate.net, academia.edu, facebook.com,...11.0
30000-0003-3193-030x111juan pablowolff mejiaaspirante a maestría en derecho y negocios int...[juan pablo wolff, pablo wolff mejia, juan p. ...[[twitter, https://twitter.com/pablomejiam], [...juanpmejia@ulasallista.edu.co...00001ulasallista.edu.coNaNNaN[twitter.com, youtube.com, google.com, linkedi...11.0
40000-0002-5341-6531111trenthammondmr trent hammond is an honorary research fello...[trent ernest hammond (t.e.hammond)][[academic support masters, http://trenthammon...trent.hammond@academicsupportmasters.com.au...10011academicsupportmasters.com.au[health.nsw.gov.au, csu.edu.au, sociologist.co...5.0[wix.com, academia.edu, researchgate.net, rese...12.0
50000-0001-5295-2271111antoniymoyseyNaNNaN[[academic journals database, http://journalda...antoniimoisei@bsmu.edu.ua...00001bsmu.edu.uaNaNNaN[journaldatabase.info, nplu.org, acls.org, ind...21.0
..................................................................
1350000-0002-8125-0081111issambencheikhNaN[issame1982, دكتور عصام بن الشيخ][[my blog web site, http://issame1982.blogspot...NaN...00001NaNNaNNaN[blogspot.com, researchgate.net, google.com, l...12.0
1360000-0002-3374-5709111guillermoortizmédico, internista, neumólogo, intensivista, e...[guillermo ortiz-ruiz][[elsevier, https://www.elsevier.com/], [asoci...NaN...6200880NaNNaNNaN[elsevier.com, amci.org.co, springer.com, revi...12.0
1370000-0001-7228-5680111textprotocolNaNNaN[[about, https://about.me/textprotocol], [gith...NaN...00000NaNNaNNaN[about.me, github.com, gitlab.com, gravatar.co...12.0
1390000-0003-1047-4229111bayusaktibayu purbha saktisaya adalah bayu purbha sakti...[bayu purbha sakti][[osf, http://osf.io/qe2ug], [inarxiv, https:/...NaN...00001NaNNaNNaN[osf.io, osf.io, academia.edu, mendeley.com, f...12.0
1400000-0003-4836-7074111karla haydeeortiz palafoxkarla haydee ortíz palafoxmiembro del sistema ...[karla palafox][[opinión día del maestro, http://www.cronicaj...NaN...00021NaNNaNNaN[cronicajalisco.com, youtube.com, tlaquepaque....22.0
\n", "

115 rows × 29 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "0 0000-0002-7843-8497 1 1 1 \n", "1 0000-0003-1554-1531 1 1 1 \n", "3 0000-0003-3193-030x 1 1 1 \n", "4 0000-0002-5341-6531 1 1 1 \n", "5 0000-0001-5295-2271 1 1 1 \n", ".. ... ... ... ... \n", "135 0000-0002-8125-0081 1 1 1 \n", "136 0000-0002-3374-5709 1 1 1 \n", "137 0000-0001-7228-5680 1 1 1 \n", "139 0000-0003-1047-4229 1 1 1 \n", "140 0000-0003-4836-7074 1 1 1 \n", "\n", " given_names family_name \\\n", "0 davi barbosa \n", "1 katarzyna ochman \n", "3 juan pablo wolff mejia \n", "4 trent hammond \n", "5 antoniy moysey \n", ".. ... ... \n", "135 issam bencheikh \n", "136 guillermo ortiz \n", "137 text protocol \n", "139 bayu sakti \n", "140 karla haydee ortiz palafox \n", "\n", " biography \\\n", "0 pesquisador na área sociojurídica, professor, ... \n", "1 katarzyna ochman [kataˈʐɨna ˈɔxman] is assista... \n", "3 aspirante a maestría en derecho y negocios int... \n", "4 mr trent hammond is an honorary research fello... \n", "5 NaN \n", ".. ... \n", "135 NaN \n", "136 médico, internista, neumólogo, intensivista, e... \n", "137 NaN \n", "139 bayu purbha saktisaya adalah bayu purbha sakti... \n", "140 karla haydee ortíz palafoxmiembro del sistema ... \n", "\n", " other_names \\\n", "0 [professor davi barbosa delmont] \n", "1 [[kataˈʐɨna ˈɔxman], catharina ochman, cathari... \n", "3 [juan pablo wolff, pablo wolff mejia, juan p. ... \n", "4 [trent ernest hammond (t.e.hammond)] \n", "5 NaN \n", ".. ... \n", "135 [issame1982, دكتور عصام بن الشيخ] \n", "136 [guillermo ortiz-ruiz] \n", "137 NaN \n", "139 [bayu purbha sakti] \n", "140 [karla palafox] \n", "\n", " urls \\\n", "0 [[plataforma de cursos ideia criativa, https:/... \n", "1 [[researchgate, https://www.researchgate.net/p... \n", "3 [[twitter, https://twitter.com/pablomejiam], [... \n", "4 [[academic support masters, http://trenthammon... \n", "5 [[academic journals database, http://journalda... \n", ".. ... \n", "135 [[my blog web site, http://issame1982.blogspot... \n", "136 [[elsevier, https://www.elsevier.com/], [asoci... \n", "137 [[about, https://about.me/textprotocol], [gith... \n", "139 [[osf, http://osf.io/qe2ug], [inarxiv, https:/... \n", "140 [[opinión día del maestro, http://www.cronicaj... \n", "\n", " primary_email ... n_doi n_arxiv n_pmc \\\n", "0 NaN ... 0 0 0 \n", "1 NaN ... 1 0 0 \n", "3 juanpmejia@ulasallista.edu.co ... 0 0 0 \n", "4 trent.hammond@academicsupportmasters.com.au ... 1 0 0 \n", "5 antoniimoisei@bsmu.edu.ua ... 0 0 0 \n", ".. ... ... ... ... ... \n", "135 NaN ... 0 0 0 \n", "136 NaN ... 62 0 0 \n", "137 NaN ... 0 0 0 \n", "139 NaN ... 0 0 0 \n", "140 NaN ... 0 0 0 \n", "\n", " n_other_pids label primary_email_domain \\\n", "0 0 0 NaN \n", "1 0 1 NaN \n", "3 0 1 ulasallista.edu.co \n", "4 1 1 academicsupportmasters.com.au \n", "5 0 1 bsmu.edu.ua \n", ".. ... ... ... \n", "135 0 1 NaN \n", "136 88 0 NaN \n", "137 0 0 NaN \n", "139 0 1 NaN \n", "140 2 1 NaN \n", "\n", " other_email_domains n_emails \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "3 NaN NaN \n", "4 [health.nsw.gov.au, csu.edu.au, sociologist.co... 5.0 \n", "5 NaN NaN \n", ".. ... ... \n", "135 NaN NaN \n", "136 NaN NaN \n", "137 NaN NaN \n", "139 NaN NaN \n", "140 NaN NaN \n", "\n", " url_domains n_urls \n", "0 [eadplataforma.com, facebook.com, youtube.com,... 39.0 \n", "1 [researchgate.net, academia.edu, facebook.com,... 11.0 \n", "3 [twitter.com, youtube.com, google.com, linkedi... 11.0 \n", "4 [wix.com, academia.edu, researchgate.net, rese... 12.0 \n", "5 [journaldatabase.info, nplu.org, acls.org, ind... 21.0 \n", ".. ... ... \n", "135 [blogspot.com, researchgate.net, google.com, l... 12.0 \n", "136 [elsevier.com, amci.org.co, springer.com, revi... 12.0 \n", "137 [about.me, github.com, gitlab.com, gravatar.co... 12.0 \n", "139 [osf.io, osf.io, academia.edu, mendeley.com, f... 12.0 \n", "140 [cronicajalisco.com, youtube.com, tlaquepaque.... 22.0 \n", "\n", "[115 rows x 29 columns]" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "exploded_sources[exploded_sources.apply(lambda x: x['works_source'].find(x['given_names']) >= 0, axis=1)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Works source" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Paste from Miriam" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## External IDs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "External IDs should come from reliable sources. ORCiD registrants cannot add them freely." ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "df['n_ids'] = df[df['external_ids'].notna()].external_ids.str.len()" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 1.301959e+06\n", "mean 1.358640e+00\n", "std 6.635087e-01\n", "min 1.000000e+00\n", "25% 1.000000e+00\n", "50% 1.000000e+00\n", "75% 2.000000e+00\n", "max 8.000000e+01\n", "Name: n_ids, dtype: float64" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.n_ids.describe()" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...n_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsn_emailsurl_domainsn_urlsn_ids
72533300000-0002-9554-6633111john awilliamsNaNNaN[[aston university profile page, https://resea...NaN...002081NaNNaNNaN[aston.ac.uk]1.080.0
\n", "

1 rows × 30 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "7253330 0000-0002-9554-6633 1 1 1 \n", "\n", " given_names family_name biography other_names \\\n", "7253330 john a williams NaN NaN \n", "\n", " urls primary_email ... \\\n", "7253330 [[aston university profile page, https://resea... NaN ... \n", "\n", " n_arxiv n_pmc n_other_pids label primary_email_domain \\\n", "7253330 0 0 208 1 NaN \n", "\n", " other_email_domains n_emails url_domains n_urls n_ids \n", "7253330 NaN NaN [aston.ac.uk] 1.0 80.0 \n", "\n", "[1 rows x 30 columns]" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df.n_ids == df.n_ids.max()]" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "ids = df[['orcid', 'external_ids']].explode('external_ids').reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "ids['provider'] = ids[ids.external_ids.notna()]['external_ids'].apply(lambda x: x[0])" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidexternal_idsprovider
70000-0001-7463-977x[loop profile, 371409]loop profile
90000-0001-8718-0056[scopus author id, 55466912100]scopus author id
100000-0001-8718-0056[scopus author id, 7102015452]scopus author id
140000-0001-9708-5570[researcherid, p-5112-2015]researcherid
150000-0001-9708-5570[scopus author id, 42062216900]scopus author id
\n", "
" ], "text/plain": [ " orcid external_ids provider\n", "7 0000-0001-7463-977x [loop profile, 371409] loop profile\n", "9 0000-0001-8718-0056 [scopus author id, 55466912100] scopus author id\n", "10 0000-0001-8718-0056 [scopus author id, 7102015452] scopus author id\n", "14 0000-0001-9708-5570 [researcherid, p-5112-2015] researcherid\n", "15 0000-0001-9708-5570 [scopus author id, 42062216900] scopus author id" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ids[ids.provider.notna()].head()" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "scopus author id", "researcherid", "loop profile", "ciência id", "researcher name resolver id", "中国科学家在线", "sciprofile", "isni", "gnd", "pitt id", "technical university of denmark cwis", "researcher id", "id dialnet", "digital author id", "scopus author id: ", "authenticusid", "hku researcherpage", "uow scholars", "cti vitae", "scopus author id:", "hkust profile", "chalmers id", "scopus id", "iauthor", "google scholar", "digital author id (dai)", "authid", "dai", "us epa vivo", "scopus id", "authenticus", "smithsonian profiles", "github", "escientist", "vivo cornell", "researcherid:", "id dialnet:", "dialnet id", "sciprofiles", "kaken", "une researcher id", "researcherid: ", "orcid", "scienceopen", "profile system identifier", "orcid id", "custom" ], "y": [ 1030807, 544825, 117325, 36666, 7907, 4804, 4411, 3075, 2954, 2674, 2483, 1445, 1168, 1124, 1077, 869, 741, 646, 581, 548, 522, 430, 254, 212, 200, 177, 175, 155, 146, 127, 83, 61, 51, 49, 46, 39, 7, 6, 5, 5, 4, 3, 2, 1, 1, 1, 1 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "IDs provided by providers" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data = [\n", " go.Bar(\n", " x=ids.groupby('provider').count().sort_values('orcid', ascending=False).index,\n", " y=ids.groupby('provider').count().sort_values('orcid', ascending=False)['orcid']\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='IDs provided by providers',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([nan, 'loop profile', 'scopus author id', 'researcherid',\n", " 'scopus author id: ', 'gnd', 'isni', 'ciência id', 'pitt id',\n", " 'id dialnet', 'technical university of denmark cwis',\n", " 'researcher name resolver id', 'scopus author id:',\n", " 'hkust profile', '中国科学家在线', 'cti vitae', 'escientist',\n", " 'researcher id', 'sciprofile', 'digital author id', 'scopus id',\n", " 'uow scholars', 'authenticusid', 'authenticus', 'authid',\n", " 'hku researcherpage', 'chalmers id', 'iauthor', 'us epa vivo',\n", " 'digital author id (dai)', 'vivo cornell', 'smithsonian profiles',\n", " 'github', 'google scholar', 'scopus id', 'researcherid:', 'dai',\n", " 'kaken', 'orcid id', 'dialnet id', 'profile system identifier',\n", " 'sciprofiles', 'id dialnet:', 'researcherid: ', 'scienceopen',\n", " 'une researcher id', 'custom', 'orcid'], dtype=object)" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.unique(ids['provider'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Keywords" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This field is problematic as users can be nasty and put multiple keywords in one as opposed of having different keywords. Look this" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['data science ',\n", " 'science of science',\n", " 'scholarly knowledge mining',\n", " 'open science',\n", " 'research infrastructures']" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['orcid'] == AM]['keywords'].values[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I did a good job. The following instead is dirty" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['open access, open science, libraries, repositories, social web,']" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['orcid'] == PP]['keywords'].values[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "So the keyword field needs some cleaning" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "def fix_keywords(lst):\n", " fixed = set()\n", " for k in lst:\n", " tokens = set(k.split(','))\n", "# tokens.remove('')\n", " for t in tokens:\n", " fixed.add(str.strip(t))\n", " fixed.discard('')\n", " return list(fixed)" ] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [], "source": [ "df['fixed_keywords'] = df[df.keywords.notna()]['keywords'].apply(lambda x: fix_keywords(x))" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['open science', 'repositories', 'open access', 'libraries', 'social web']" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['orcid'] == PP]['fixed_keywords'].values[0]" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "df['n_keywords'] = df.keywords.str.len()" ] }, { "cell_type": "code", "execution_count": 63, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidn_keywords
28510810000-0002-0673-0341154.0
73441510000-0002-7060-4112141.0
22354400000-0002-6075-3501140.0
29942330000-0002-4071-0301118.0
39713230000-0002-9638-8091115.0
.........
109165690000-0001-5692-7639NaN
109165700000-0003-1539-0999NaN
109165710000-0003-2858-5509NaN
109165720000-0003-2438-9500NaN
109165730000-0003-4119-4772NaN
\n", "

10916574 rows × 2 columns

\n", "
" ], "text/plain": [ " orcid n_keywords\n", "2851081 0000-0002-0673-0341 154.0\n", "7344151 0000-0002-7060-4112 141.0\n", "2235440 0000-0002-6075-3501 140.0\n", "2994233 0000-0002-4071-0301 118.0\n", "3971323 0000-0002-9638-8091 115.0\n", "... ... ...\n", "10916569 0000-0001-5692-7639 NaN\n", "10916570 0000-0003-1539-0999 NaN\n", "10916571 0000-0003-2858-5509 NaN\n", "10916572 0000-0003-2438-9500 NaN\n", "10916573 0000-0003-4119-4772 NaN\n", "\n", "[10916574 rows x 2 columns]" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values('n_keywords', ascending=False)[['orcid', 'n_keywords']]" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "0000-0002-0673-0341", "0000-0002-7060-4112", "0000-0002-6075-3501", "0000-0002-4071-0301", "0000-0002-9638-8091", "0000-0002-4235-4259", "0000-0001-9462-5666", "0000-0003-0076-6287", "0000-0002-1878-9762", "0000-0001-6537-7683", "0000-0001-6307-6027", "0000-0003-2273-9888", "0000-0003-1799-0971", "0000-0001-5287-1949", "0000-0002-0937-7061", "0000-0001-9715-9357", "0000-0001-5696-1052", "0000-0003-2998-5520", "0000-0001-5869-2204", "0000-0002-0156-3580", "0000-0002-9625-6742", "0000-0002-8401-8018", "0000-0001-9985-1697", "0000-0003-4246-8579", "0000-0002-7710-0355", "0000-0002-8083-7382", "0000-0001-7654-5013", "0000-0001-6939-3859", "0000-0002-3061-3364", "0000-0003-2509-2549", "0000-0002-0463-0048", "0000-0001-5230-715x", "0000-0001-9336-6850", "0000-0001-5458-7167", "0000-0003-0209-180x", "0000-0003-3584-6834", "0000-0002-8227-5387", "0000-0002-9381-2264", "0000-0003-3340-6413", "0000-0002-2935-1934", "0000-0002-8644-8396", "0000-0002-3123-3021", "0000-0002-8659-6321", "0000-0002-8449-2211", "0000-0001-5167-7466", "0000-0001-5637-1124", "0000-0003-2532-2906", "0000-0003-4673-1063", "0000-0003-4608-3844", "0000-0002-3532-043x", "0000-0002-6347-9464", "0000-0003-4505-3678", "0000-0002-2683-4527", "0000-0003-4374-6374", "0000-0003-4511-7942", "0000-0002-1103-9651", "0000-0001-9280-6017", "0000-0003-3720-1183", "0000-0001-9586-0780", "0000-0002-5306-7781", "0000-0003-2218-1343", "0000-0002-8499-1045", "0000-0003-1863-0265", "0000-0002-5539-1761", "0000-0003-2550-1859", "0000-0002-8072-1152", "0000-0003-3342-6123", "0000-0001-6861-9561", "0000-0002-2252-672x", "0000-0002-3597-3350", "0000-0002-3907-3552", "0000-0001-8689-185x", "0000-0002-5274-7742", "0000-0002-3186-8860", "0000-0001-6843-9325", "0000-0001-7133-7848", "0000-0003-4486-2684", "0000-0003-3343-5660", "0000-0002-9014-2090", "0000-0002-6282-0640", "0000-0001-7857-4133", "0000-0002-1294-2156", "0000-0002-4432-3448", "0000-0003-0097-4182", "0000-0003-1245-7705", "0000-0001-8445-412x", "0000-0003-4153-6779", "0000-0002-9125-6022", "0000-0002-4598-2891", "0000-0003-3387-3193", "0000-0002-3866-6460", "0000-0002-1411-3028", "0000-0003-4283-2895", "0000-0002-0211-7195", "0000-0002-3898-9542", "0000-0002-1545-7818", "0000-0002-4963-9345", "0000-0002-1770-9660", "0000-0002-1960-5857", "0000-0003-2054-477x" ], "y": [ 154, 141, 140, 118, 115, 104, 98, 94, 92, 91, 88, 86, 84, 82, 78, 77, 76, 75, 74, 73, 71, 70, 69, 66, 64, 62, 61, 60, 58, 57, 56, 54, 53, 53, 52, 51, 51, 51, 51, 50, 50, 50, 50, 49, 49, 49, 48, 48, 48, 48, 48, 48, 48, 47, 47, 46, 46, 46, 45, 44, 44, 44, 44, 44, 44, 43, 43, 42, 42, 42, 42, 42, 42, 42, 41, 41, 41, 41, 41, 41, 40, 40, 40, 40, 40, 40, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 38, 38, 38, 38 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Keywords provided by ORCiD" }, "xaxis": { "range": [ -0.5, 99.5 ], "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "set_top_n(100)\n", "data = [\n", " go.Bar(\n", " x=df.sort_values('n_keywords', ascending=False)['orcid'][:TOP_N],\n", " y=df.sort_values('n_keywords', ascending=False)['n_keywords'][:TOP_N]\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Keywords provided by ORCiD',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "grouped_keywords = df[['orcid', 'keywords']]\\\n", " .explode('keywords')\\\n", " .reset_index(drop=True)\\\n", " .groupby('keywords')\\\n", " .count()\\\n", " .sort_values('orcid', ascending=False)" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "machine learning", "bioinformatics", "molecular biology", "artificial intelligence", "education", "epidemiology", "neuroscience", "public health", "cancer", "immunology", "microbiology", "ecology", "remote sensing", "genetics", "climate change", "deep learning", "genomics", "biochemistry", "data science", "computer vision", "psychology", "sustainability", "biotechnology", "nanotechnology", "robotics", "data mining", "statistics", "image processing", "gis", "nutrition", "chemistry", "optimization", "computer science", "marketing", "biomaterials", "nanomaterials", "renewable energy", "organic chemistry", "electrochemistry", "educação", "diabetes", "analytical chemistry", "innovation", "materials science", "mass spectrometry", "architecture", "evolution", "epigenetics", "physics", "biomechanics" ], "y": [ 5090, 3299, 2377, 2322, 2218, 2121, 2010, 1973, 1956, 1829, 1805, 1788, 1688, 1648, 1621, 1485, 1451, 1398, 1363, 1339, 1331, 1313, 1298, 1294, 1149, 1135, 1134, 1104, 1075, 1071, 1064, 1042, 1037, 1030, 1013, 1010, 1005, 977, 977, 959, 958, 953, 937, 917, 910, 895, 892, 884, 883, 882 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Top-50 keywords occurrence" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "set_top_n(50)\n", "data = [\n", " go.Bar(\n", " x=grouped_keywords.index[:TOP_N],\n", " y=grouped_keywords['orcid'][:TOP_N]\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Top-%s keywords occurrence' % TOP_N,\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Correlation" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "coloraxis": "coloraxis", "hovertemplate": "x: %{x}
y: %{y}
color: %{z}", "name": "0", "type": "heatmap", "x": [ "claimed", "verified_email", "verified_primary_email", "n_works", "n_doi", "n_arxiv", "n_pmc", "n_other_pids", "label", "n_emails", "n_urls", "n_ids", "n_keywords" ], "xaxis": "x", "y": [ "claimed", "verified_email", "verified_primary_email", "n_works", "n_doi", "n_arxiv", "n_pmc", "n_other_pids", "label", "n_emails", "n_urls", "n_ids", "n_keywords" ], "yaxis": "y", "z": [ [ null, null, null, null, null, null, null, null, null, null, null, null, null ], [ null, 1, 0.976162114192081, 0.0643465684083431, 0.06256365939443789, 0.004813567718545522, 0.022837768079663843, 0.0502787806136592, 0.1653735105575277, 0.011077499054971796, 0.01616663985261792, 0.0878909499747942, 0.01702128763665974 ], [ null, 0.976162114192081, 1, 0.06550520343326285, 0.06358869411827689, 0.004936396077751568, 0.0233694641294265, 0.05107779193878439, 0.1673817151653653, 0.009088965040421332, 0.016748133920736267, 0.0891344788642304, 0.01757470891436621 ], [ null, 0.0643465684083431, 0.06550520343326285, 1, 0.9422006818527181, 0.2510908375242561, 0.3452995689789822, 0.8507066573191994, 0.2193240629073337, 0.04012080330827889, 0.050551529905608905, 0.24172815699206937, 0.030991434187229255 ], [ null, 0.06256365939443789, 0.06358869411827689, 0.9422006818527181, 1, 0.2859256869913497, 0.35237093272562453, 0.8207407571730607, 0.20528211794680093, 0.03066074418509789, 0.02249574354972604, 0.22699396525236162, 0.029924481047131947 ], [ null, 0.004813567718545522, 0.004936396077751568, 0.2510908375242561, 0.2859256869913497, 1, -0.0016834274457045173, 0.18858664921353688, 0.013625663523574786, 0.001327784437475615, -0.0029341825430676785, 0.00551756083696426, -0.0010622024587917253 ], [ null, 0.022837768079663843, 0.0233694641294265, 0.3452995689789822, 0.35237093272562453, -0.0016834274457045173, 1, 0.2813216452145862, 0.06643990962625554, 0.0034751570518647148, 0.00017861447927955607, 0.06890564721203783, 0.02385706062724447 ], [ null, 0.0502787806136592, 0.05107779193878439, 0.8507066573191994, 0.8207407571730607, 0.18858664921353688, 0.2813216452145862, 1, 0.16718354633724933, 0.020109885973996034, 0.008514311952439128, 0.23592717383229878, 0.023831895109099027 ], [ null, 0.1653735105575277, 0.1673817151653653, 0.2193240629073337, 0.20528211794680093, 0.013625663523574786, 0.06643990962625554, 0.16718354633724933, 1, 0.018556742838561485, 0.017316375665650897, 0.2141058845072524, 0.05566397359882779 ], [ null, 0.011077499054971796, 0.009088965040421332, 0.04012080330827889, 0.03066074418509789, 0.001327784437475615, 0.0034751570518647148, 0.020109885973996034, 0.018556742838561485, 1, 0.09408460253059668, 0.0452261998698129, 0.048598477595472214 ], [ null, 0.01616663985261792, 0.016748133920736267, 0.050551529905608905, 0.02249574354972604, -0.0029341825430676785, 0.00017861447927955607, 0.008514311952439128, 0.017316375665650897, 0.09408460253059668, 1, 0.06946298201611972, 0.15905312700498755 ], [ null, 0.0878909499747942, 0.0891344788642304, 0.24172815699206937, 0.22699396525236162, 0.00551756083696426, 0.06890564721203783, 0.23592717383229878, 0.2141058845072524, 0.0452261998698129, 0.06946298201611972, 1, 0.06380885961518437 ], [ null, 0.01702128763665974, 0.01757470891436621, 0.030991434187229255, 0.029924481047131947, -0.0010622024587917253, 0.02385706062724447, 0.023831895109099027, 0.05566397359882779, 0.048598477595472214, 0.15905312700498755, 0.06380885961518437, 1 ] ] } ], "layout": { "coloraxis": { "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "margin": { "t": 60 }, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "xaxis": { "anchor": "y", "constrain": "domain", "domain": [ 0, 1 ], "scaleanchor": "y" }, "yaxis": { "anchor": "x", "autorange": "reversed", "constrain": "domain", "domain": [ 0, 1 ] } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = px.imshow(df[df.n_ids > 0].corr())\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }