From 629d7816455ce46303752dff24e86c5420c896cf Mon Sep 17 00:00:00 2001 From: Andrea Mannocci Date: Thu, 25 Mar 2021 15:20:06 +0100 Subject: [PATCH] moved lots of preprocessing under make --- notebooks/01-Exploration.ipynb | 15940 ++++++++++++++++++++++++++++++- src/data/make_dataset.py | 49 +- 2 files changed, 15549 insertions(+), 440 deletions(-) diff --git a/notebooks/01-Exploration.ipynb b/notebooks/01-Exploration.ipynb index 66cbe21..d94c9b1 100644 --- a/notebooks/01-Exploration.ipynb +++ b/notebooks/01-Exploration.ipynb @@ -54,7 +54,9 @@ "import pandas as pd\n", "import ast\n", "import tldextract\n", - "import numpy\n", + "import numpy as np\n", + "\n", + "import antispam\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", @@ -64,10 +66,13 @@ "init_notebook_mode(connected=True)\n", "TOP_N = 0\n", "TOP_RANGE = [0, 0]\n", + "\n", "def set_top_n(n):\n", " global TOP_N, TOP_RANGE\n", " TOP_N = n\n", - " TOP_RANGE = [-.5, n - 1 + .5]" + " TOP_RANGE = [-.5, n - 1 + .5]\n", + " \n", + "pd.set_option('display.max_columns', None)" ] }, { @@ -162,16 +167,16 @@ " \n", " \n", " orcid\n", - " claimed\n", " verified_email\n", " verified_primary_email\n", " given_names\n", " family_name\n", " biography\n", " other_names\n", - " urls\n", " primary_email\n", - " ...\n", + " keywords\n", + " external_ids\n", + " education\n", " employment\n", " n_works\n", " works_source\n", @@ -182,185 +187,257 @@ " n_pmc\n", " n_other_pids\n", " label\n", + " primary_email_domain\n", + " other_email_domains\n", + " url_domains\n", + " n_emails\n", + " n_urls\n", + " n_ids\n", + " n_keywords\n", + " n_education\n", + " n_employment\n", " \n", " \n", " \n", " \n", - " 10000000\n", - " 0000-0001-9812-9790\n", + " 0000-0001-6097-3953\n", " 1\n", - " 1\n", - " 1\n", - " jonathan\n", - " termaat\n", + " 0\n", + " 0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", - " ...\n", - " [[research co-ordinator, waikato district heal...\n", " 0\n", " NaN\n", - " 2019-04-15t03:08:05.268z\n", - " 2019-04-15t03:09:44.443z\n", + " 2018-03-02t09:29:16.528z\n", + " 2018-03-02t09:43:07.551z\n", " 0\n", " 0\n", " 0\n", " 0\n", " 0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", - " 10000001\n", - " 0000-0002-0572-0598\n", + " 0000-0001-6112-5550\n", " 1\n", " 1\n", " 1\n", - " jonathan\n", - " jørgensen\n", + " NaN\n", + " NaN\n", + " NaN\n", + " [v.i. yurtaev; v. yurtaev]\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", - " ...\n", - " NaN\n", + " [[professor, peoples friendship university of ...\n", " 0\n", " NaN\n", - " 2019-03-17t20:31:23.753z\n", - " 2019-03-17t20:33:50.316z\n", + " 2018-04-03t07:50:23.358z\n", + " 2020-03-18t09:42:44.753z\n", " 0\n", " 0\n", " 0\n", " 0\n", " 0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " 1.0\n", " \n", " \n", - " 10000002\n", - " 0000-0002-1512-9646\n", + " 0000-0001-6152-2695\n", " 1\n", " 1\n", " 1\n", - " jonathan\n", - " mkrtchyan\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", - " ...\n", " NaN\n", - " 1\n", - " [jonathan mkrtchyan]\n", - " 2020-08-24t18:47:27.332z\n", - " 2020-08-24t18:54:37.398z\n", - " 1\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " 0\n", + " NaN\n", + " 2019-12-11t15:31:56.388z\n", + " 2020-01-28t15:34:17.309z\n", " 0\n", " 0\n", - " 2\n", - " 1\n", + " 0\n", + " 0\n", + " 0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", - " 10000003\n", - " 0000-0002-2271-4069\n", + " 0000-0001-6220-5683\n", " 1\n", " 1\n", " 1\n", - " jonathan\n", - " pickard\n", " NaN\n", " NaN\n", " NaN\n", " NaN\n", - " ...\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " [[research scientist, new york university abu ...\n", " 0\n", " NaN\n", - " 2018-05-03t09:34:25.613z\n", - " 2018-05-10t13:05:09.297z\n", + " 2015-08-18t12:36:45.307z\n", + " 2020-09-23t13:37:54.180z\n", " 0\n", " 0\n", " 0\n", " 0\n", " 0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " 1.0\n", " \n", " \n", - " 10000004\n", - " 0000-0002-3054-9622\n", + " 0000-0001-7071-8294\n", " 1\n", " 1\n", " 1\n", - " jonathan\n", - " greer\n", - " NaN\n", - " [jonathan s. greer]\n", " NaN\n", " NaN\n", - " ...\n", - " [[associate professor of old testament and dir...\n", - " 2\n", - " [multidisciplinary digital publishing institut...\n", - " 2019-04-09t20:05:25.447z\n", - " 2020-02-07t15:55:18.951z\n", - " 2\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " [[researcher (academic), universidad de zarago...\n", + " 0\n", + " NaN\n", + " 2014-03-10t13:22:01.966z\n", + " 2016-06-14t22:17:54.470z\n", " 0\n", " 0\n", - " 1\n", - " 1\n", + " 0\n", + " 0\n", + " 0\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " 2.0\n", " \n", " \n", "\n", - "

5 rows × 24 columns

\n", "" ], "text/plain": [ - " orcid claimed verified_email \\\n", - "10000000 0000-0001-9812-9790 1 1 \n", - "10000001 0000-0002-0572-0598 1 1 \n", - "10000002 0000-0002-1512-9646 1 1 \n", - "10000003 0000-0002-2271-4069 1 1 \n", - "10000004 0000-0002-3054-9622 1 1 \n", + " orcid verified_email verified_primary_email \\\n", + "0000-0001-6097-3953 1 0 0 \n", + "0000-0001-6112-5550 1 1 1 \n", + "0000-0001-6152-2695 1 1 1 \n", + "0000-0001-6220-5683 1 1 1 \n", + "0000-0001-7071-8294 1 1 1 \n", "\n", - " verified_primary_email given_names family_name biography \\\n", - "10000000 1 jonathan termaat NaN \n", - "10000001 1 jonathan jørgensen NaN \n", - "10000002 1 jonathan mkrtchyan NaN \n", - "10000003 1 jonathan pickard NaN \n", - "10000004 1 jonathan greer NaN \n", + " given_names family_name biography \\\n", + "0000-0001-6097-3953 NaN NaN NaN \n", + "0000-0001-6112-5550 NaN NaN NaN \n", + "0000-0001-6152-2695 NaN NaN NaN \n", + "0000-0001-6220-5683 NaN NaN NaN \n", + "0000-0001-7071-8294 NaN NaN NaN \n", "\n", - " other_names urls primary_email ... \\\n", - "10000000 NaN NaN NaN ... \n", - "10000001 NaN NaN NaN ... \n", - "10000002 NaN NaN NaN ... \n", - "10000003 NaN NaN NaN ... \n", - "10000004 [jonathan s. greer] NaN NaN ... \n", + " other_names primary_email keywords \\\n", + "0000-0001-6097-3953 NaN NaN NaN \n", + "0000-0001-6112-5550 [v.i. yurtaev; v. yurtaev] NaN NaN \n", + "0000-0001-6152-2695 NaN NaN NaN \n", + "0000-0001-6220-5683 NaN NaN NaN \n", + "0000-0001-7071-8294 NaN NaN NaN \n", "\n", - " employment n_works \\\n", - "10000000 [[research co-ordinator, waikato district heal... 0 \n", - "10000001 NaN 0 \n", - "10000002 NaN 1 \n", - "10000003 NaN 0 \n", - "10000004 [[associate professor of old testament and dir... 2 \n", + " external_ids education \\\n", + "0000-0001-6097-3953 NaN NaN \n", + "0000-0001-6112-5550 NaN NaN \n", + "0000-0001-6152-2695 NaN NaN \n", + "0000-0001-6220-5683 NaN NaN \n", + "0000-0001-7071-8294 NaN NaN \n", "\n", - " works_source \\\n", - "10000000 NaN \n", - "10000001 NaN \n", - "10000002 [jonathan mkrtchyan] \n", - "10000003 NaN \n", - "10000004 [multidisciplinary digital publishing institut... \n", + " employment \\\n", + "0000-0001-6097-3953 NaN \n", + "0000-0001-6112-5550 [[professor, peoples friendship university of ... \n", + "0000-0001-6152-2695 NaN \n", + "0000-0001-6220-5683 [[research scientist, new york university abu ... \n", + "0000-0001-7071-8294 [[researcher (academic), universidad de zarago... \n", "\n", - " activation_date last_update_date n_doi n_arxiv \\\n", - "10000000 2019-04-15t03:08:05.268z 2019-04-15t03:09:44.443z 0 0 \n", - "10000001 2019-03-17t20:31:23.753z 2019-03-17t20:33:50.316z 0 0 \n", - "10000002 2020-08-24t18:47:27.332z 2020-08-24t18:54:37.398z 1 0 \n", - "10000003 2018-05-03t09:34:25.613z 2018-05-10t13:05:09.297z 0 0 \n", - "10000004 2019-04-09t20:05:25.447z 2020-02-07t15:55:18.951z 2 0 \n", + " n_works works_source activation_date \\\n", + "0000-0001-6097-3953 0 NaN 2018-03-02t09:29:16.528z \n", + "0000-0001-6112-5550 0 NaN 2018-04-03t07:50:23.358z \n", + "0000-0001-6152-2695 0 NaN 2019-12-11t15:31:56.388z \n", + "0000-0001-6220-5683 0 NaN 2015-08-18t12:36:45.307z \n", + "0000-0001-7071-8294 0 NaN 2014-03-10t13:22:01.966z \n", "\n", - " n_pmc n_other_pids label \n", - "10000000 0 0 0 \n", - "10000001 0 0 0 \n", - "10000002 0 2 1 \n", - "10000003 0 0 0 \n", - "10000004 0 1 1 \n", + " last_update_date n_doi n_arxiv n_pmc \\\n", + "0000-0001-6097-3953 2018-03-02t09:43:07.551z 0 0 0 \n", + "0000-0001-6112-5550 2020-03-18t09:42:44.753z 0 0 0 \n", + "0000-0001-6152-2695 2020-01-28t15:34:17.309z 0 0 0 \n", + "0000-0001-6220-5683 2020-09-23t13:37:54.180z 0 0 0 \n", + "0000-0001-7071-8294 2016-06-14t22:17:54.470z 0 0 0 \n", "\n", - "[5 rows x 24 columns]" + " n_other_pids label primary_email_domain \\\n", + "0000-0001-6097-3953 0 0 NaN \n", + "0000-0001-6112-5550 0 0 NaN \n", + "0000-0001-6152-2695 0 0 NaN \n", + "0000-0001-6220-5683 0 0 NaN \n", + "0000-0001-7071-8294 0 0 NaN \n", + "\n", + " other_email_domains url_domains n_emails n_urls n_ids \\\n", + "0000-0001-6097-3953 NaN NaN NaN NaN NaN \n", + "0000-0001-6112-5550 NaN NaN NaN NaN NaN \n", + "0000-0001-6152-2695 NaN NaN NaN NaN NaN \n", + "0000-0001-6220-5683 NaN NaN NaN NaN NaN \n", + "0000-0001-7071-8294 NaN NaN NaN NaN NaN \n", + "\n", + " n_keywords n_education n_employment \n", + "0000-0001-6097-3953 NaN NaN NaN \n", + "0000-0001-6112-5550 NaN NaN 1.0 \n", + "0000-0001-6152-2695 NaN NaN NaN \n", + "0000-0001-6220-5683 NaN NaN 1.0 \n", + "0000-0001-7071-8294 NaN NaN 2.0 " ] }, "execution_count": 5, @@ -370,7 +447,8 @@ ], "source": [ "parts = glob.glob('../data/processed/dataset.pkl.*')\n", - "df = pd.concat((pd.read_pickle(part) for part in parts))\n", + "\n", + "df = pd.concat((pd.read_pickle(part) for part in sorted(parts)))\n", "df.head(5)" ] }, @@ -408,16 +486,16 @@ " \n", " \n", " orcid\n", - " claimed\n", " verified_email\n", " verified_primary_email\n", " given_names\n", " family_name\n", " biography\n", " other_names\n", - " urls\n", " primary_email\n", - " ...\n", + " keywords\n", + " external_ids\n", + " education\n", " employment\n", " n_works\n", " works_source\n", @@ -428,67 +506,26 @@ " n_pmc\n", " n_other_pids\n", " label\n", + " primary_email_domain\n", + " other_email_domains\n", + " url_domains\n", + " n_emails\n", + " n_urls\n", + " n_ids\n", + " n_keywords\n", + " n_education\n", + " n_employment\n", " \n", " \n", " \n", - " \n", - " 1575869\n", - " 0000-0002-5193-7851\n", - " 1\n", - " 1\n", - " 1\n", - " andrea\n", - " mannocci\n", - " data scientist & researcher; scholarly knowled...\n", - " NaN\n", - " [[personal website, https://andremann.github.i...\n", - " andrea.mannocci@isti.cnr.it\n", - " ...\n", - " [[research associate, istituto di scienza e te...\n", - " 37\n", - " [scopus - elsevier, crossref metadata search, ...\n", - " 2017-09-12t14:28:33.467z\n", - " 2021-03-09t08:32:47.840z\n", - " 34\n", - " 0\n", - " 0\n", - " 60\n", - " 1\n", - " \n", " \n", "\n", - "

1 rows × 24 columns

\n", "" ], "text/plain": [ - " orcid claimed verified_email verified_primary_email \\\n", - "1575869 0000-0002-5193-7851 1 1 1 \n", - "\n", - " given_names family_name \\\n", - "1575869 andrea mannocci \n", - "\n", - " biography other_names \\\n", - "1575869 data scientist & researcher; scholarly knowled... NaN \n", - "\n", - " urls \\\n", - "1575869 [[personal website, https://andremann.github.i... \n", - "\n", - " primary_email ... \\\n", - "1575869 andrea.mannocci@isti.cnr.it ... \n", - "\n", - " employment n_works \\\n", - "1575869 [[research associate, istituto di scienza e te... 37 \n", - "\n", - " works_source \\\n", - "1575869 [scopus - elsevier, crossref metadata search, ... \n", - "\n", - " activation_date last_update_date n_doi n_arxiv \\\n", - "1575869 2017-09-12t14:28:33.467z 2021-03-09t08:32:47.840z 34 0 \n", - "\n", - " n_pmc n_other_pids label \n", - "1575869 0 60 1 \n", - "\n", - "[1 rows x 24 columns]" + "Empty DataFrame\n", + "Columns: [orcid, verified_email, verified_primary_email, given_names, family_name, biography, other_names, primary_email, keywords, external_ids, education, employment, n_works, works_source, activation_date, last_update_date, n_doi, n_arxiv, n_pmc, n_other_pids, label, primary_email_domain, other_email_domains, url_domains, n_emails, n_urls, n_ids, n_keywords, n_education, n_employment]\n", + "Index: []" ] }, "execution_count": 6, @@ -527,16 +564,16 @@ " \n", " \n", " orcid\n", - " claimed\n", " verified_email\n", " verified_primary_email\n", " given_names\n", " family_name\n", " biography\n", " other_names\n", - " urls\n", " primary_email\n", - " ...\n", + " keywords\n", + " external_ids\n", + " education\n", " employment\n", " n_works\n", " works_source\n", @@ -547,55 +584,26 @@ " n_pmc\n", " n_other_pids\n", " label\n", + " primary_email_domain\n", + " other_email_domains\n", + " url_domains\n", + " n_emails\n", + " n_urls\n", + " n_ids\n", + " n_keywords\n", + " n_education\n", + " n_employment\n", " \n", " \n", " \n", - " \n", - " 6819986\n", - " 0000-0001-6997-9470\n", - " 1\n", - " 1\n", - " 1\n", - " other\n", - " whatsapp\n", - " NaN\n", - " NaN\n", - " [[otherwhatsapp, https://otherwhatsapp.com/], ...\n", - " NaN\n", - " ...\n", - " NaN\n", - " 0\n", - " NaN\n", - " 2020-10-07t10:37:12.237z\n", - " 2020-10-08t02:32:03.935z\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " 0\n", - " \n", " \n", "\n", - "

1 rows × 24 columns

\n", "" ], "text/plain": [ - " orcid claimed verified_email verified_primary_email \\\n", - "6819986 0000-0001-6997-9470 1 1 1 \n", - "\n", - " given_names family_name biography other_names \\\n", - "6819986 other whatsapp NaN NaN \n", - "\n", - " urls primary_email ... \\\n", - "6819986 [[otherwhatsapp, https://otherwhatsapp.com/], ... NaN ... \n", - "\n", - " employment n_works works_source activation_date \\\n", - "6819986 NaN 0 NaN 2020-10-07t10:37:12.237z \n", - "\n", - " last_update_date n_doi n_arxiv n_pmc n_other_pids label \n", - "6819986 2020-10-08t02:32:03.935z 0 0 0 0 0 \n", - "\n", - "[1 rows x 24 columns]" + "Empty DataFrame\n", + "Columns: [orcid, verified_email, verified_primary_email, given_names, family_name, biography, other_names, primary_email, keywords, external_ids, education, employment, n_works, works_source, activation_date, last_update_date, n_doi, n_arxiv, n_pmc, n_other_pids, label, primary_email_domain, other_email_domains, url_domains, n_emails, n_urls, n_ids, n_keywords, n_education, n_employment]\n", + "Index: []" ] }, "execution_count": 7, @@ -609,18 +617,80 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 8, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "orcid 10916574\n", + "verified_email 10916574\n", + "verified_primary_email 10916574\n", + "given_names 10886150\n", + "family_name 10601571\n", + "biography 348649\n", + "other_names 551482\n", + "primary_email 123851\n", + "keywords 646400\n", + "external_ids 1301959\n", + "education 2430233\n", + "employment 2665092\n", + "n_works 10916574\n", + "works_source 2721431\n", + "activation_date 10916574\n", + "last_update_date 10916574\n", + "n_doi 10916574\n", + "n_arxiv 10916574\n", + "n_pmc 10916574\n", + "n_other_pids 10916574\n", + "label 10916574\n", + "primary_email_domain 123851\n", + "other_email_domains 48306\n", + "url_domains 707687\n", + "n_emails 48306\n", + "n_urls 707687\n", + "n_ids 1301959\n", + "n_keywords 646400\n", + "n_education 2430233\n", + "n_employment 2665092\n", + "dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.count() #10916574" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "count 10916574.0\n", + "mean 1.0\n", + "std 0.0\n", + "min 1.0\n", + "25% 1.0\n", + "50% 1.0\n", + "75% 1.0\n", + "max 1.0\n", + "Name: orcid, dtype: float64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df['orcid'].describe()" ] @@ -634,9 +704,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "count 123851\n", + "unique 123848\n", + "top maykin@owasp.org\n", + "freq 2\n", + "Name: primary_email, dtype: object" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df['primary_email'].describe()" ] @@ -650,63 +735,693 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0000-0003-0033-0918 opercin@erbakan.edu.tr\n", + "0000-0002-8774-0030 patrick.davey@monash.edu\n", + "0000-0001-9855-1676 maykin@owasp.org\n", + "Name: primary_email, dtype: object" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df['primary_email'].dropna().loc[df['primary_email'].duplicated()]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesprimary_emailkeywordsexternal_idseducationemploymentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsurl_domainsn_emailsn_urlsn_idsn_keywordsn_educationn_employment
0000-0002-0836-2271111maykinwarasartNaNNaNmaykin@owasp.orgNaNNaNNaNNaN0NaN2020-09-15t04:43:55.709z2020-09-15t05:17:28.509z00000owasp.org[dga.or.th]NaN1.0NaNNaNNaNNaNNaN
0000-0001-9855-1676111maykinwarasartNaNNaNmaykin@owasp.orgNaNNaNNaNNaN0NaN2020-10-23t17:51:51.925z2021-01-01t15:00:52.053z00000owasp.org[dga.or.th, ieee.org]NaN2.0NaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " orcid verified_email verified_primary_email \\\n", + "0000-0002-0836-2271 1 1 1 \n", + "0000-0001-9855-1676 1 1 1 \n", + "\n", + " given_names family_name biography other_names \\\n", + "0000-0002-0836-2271 maykin warasart NaN NaN \n", + "0000-0001-9855-1676 maykin warasart NaN NaN \n", + "\n", + " primary_email keywords external_ids education \\\n", + "0000-0002-0836-2271 maykin@owasp.org NaN NaN NaN \n", + "0000-0001-9855-1676 maykin@owasp.org NaN NaN NaN \n", + "\n", + " employment n_works works_source \\\n", + "0000-0002-0836-2271 NaN 0 NaN \n", + "0000-0001-9855-1676 NaN 0 NaN \n", + "\n", + " activation_date last_update_date \\\n", + "0000-0002-0836-2271 2020-09-15t04:43:55.709z 2020-09-15t05:17:28.509z \n", + "0000-0001-9855-1676 2020-10-23t17:51:51.925z 2021-01-01t15:00:52.053z \n", + "\n", + " n_doi n_arxiv n_pmc n_other_pids label \\\n", + "0000-0002-0836-2271 0 0 0 0 0 \n", + "0000-0001-9855-1676 0 0 0 0 0 \n", + "\n", + " primary_email_domain other_email_domains url_domains \\\n", + "0000-0002-0836-2271 owasp.org [dga.or.th] NaN \n", + "0000-0001-9855-1676 owasp.org [dga.or.th, ieee.org] NaN \n", + "\n", + " n_emails n_urls n_ids n_keywords n_education \\\n", + "0000-0002-0836-2271 1.0 NaN NaN NaN NaN \n", + "0000-0001-9855-1676 2.0 NaN NaN NaN NaN \n", + "\n", + " n_employment \n", + "0000-0002-0836-2271 NaN \n", + "0000-0001-9855-1676 NaN " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[df['primary_email'] == 'maykin@owasp.org']" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesprimary_emailkeywordsexternal_idseducationemploymentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsurl_domainsn_emailsn_urlsn_idsn_keywordsn_educationn_employment
0000-0002-2232-9638111osmanperçinNaNNaNopercin@erbakan.edu.trNaNNaNNaNNaN0NaN2015-01-12t13:47:55.549z2020-01-27t07:38:24.269z00000erbakan.edu.trNaNNaNNaNNaNNaNNaNNaNNaN
0000-0003-0033-0918111osmanperçinNaNNaNopercin@erbakan.edu.trNaNNaNNaN[[, necmettin erbakan university, konya, , tr,...0NaN2015-10-13t05:47:12.014z2020-12-25t13:52:03.976z00000erbakan.edu.trNaNNaNNaNNaNNaNNaNNaN1.0
\n", + "
" + ], + "text/plain": [ + " orcid verified_email verified_primary_email \\\n", + "0000-0002-2232-9638 1 1 1 \n", + "0000-0003-0033-0918 1 1 1 \n", + "\n", + " given_names family_name biography other_names \\\n", + "0000-0002-2232-9638 osman perçin NaN NaN \n", + "0000-0003-0033-0918 osman perçin NaN NaN \n", + "\n", + " primary_email keywords external_ids education \\\n", + "0000-0002-2232-9638 opercin@erbakan.edu.tr NaN NaN NaN \n", + "0000-0003-0033-0918 opercin@erbakan.edu.tr NaN NaN NaN \n", + "\n", + " employment \\\n", + "0000-0002-2232-9638 NaN \n", + "0000-0003-0033-0918 [[, necmettin erbakan university, konya, , tr,... \n", + "\n", + " n_works works_source activation_date \\\n", + "0000-0002-2232-9638 0 NaN 2015-01-12t13:47:55.549z \n", + "0000-0003-0033-0918 0 NaN 2015-10-13t05:47:12.014z \n", + "\n", + " last_update_date n_doi n_arxiv n_pmc \\\n", + "0000-0002-2232-9638 2020-01-27t07:38:24.269z 0 0 0 \n", + "0000-0003-0033-0918 2020-12-25t13:52:03.976z 0 0 0 \n", + "\n", + " n_other_pids label primary_email_domain \\\n", + "0000-0002-2232-9638 0 0 erbakan.edu.tr \n", + "0000-0003-0033-0918 0 0 erbakan.edu.tr \n", + "\n", + " other_email_domains url_domains n_emails n_urls n_ids \\\n", + "0000-0002-2232-9638 NaN NaN NaN NaN NaN \n", + "0000-0003-0033-0918 NaN NaN NaN NaN NaN \n", + "\n", + " n_keywords n_education n_employment \n", + "0000-0002-2232-9638 NaN NaN NaN \n", + "0000-0003-0033-0918 NaN NaN 1.0 " + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[df['primary_email'] == 'opercin@erbakan.edu.tr']" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesprimary_emailkeywordsexternal_idseducationemploymentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsurl_domainsn_emailsn_urlsn_idsn_keywordsn_educationn_employment
0000-0002-9158-1757111patrickdaveyNaNNaNpatrick.davey@monash.edu[radiochemistry, bioinorganic chemistry, inorg...NaNNaN[[phd student, monash university, melbourne, ,...0NaN2019-05-09t23:01:02.170z2019-08-20t03:00:17.844z00000monash.eduNaNNaNNaNNaNNaN4.0NaN1.0
0000-0002-8774-0030111patrickdaveyNaNNaNpatrick.davey@monash.eduNaNNaNNaN[[phd student, monash university, melbourne, v...1[crossref]2018-09-11t10:47:10.997z2021-02-09t06:21:44.138z10000monash.eduNaNNaNNaNNaNNaNNaNNaN1.0
\n", + "
" + ], + "text/plain": [ + " orcid verified_email verified_primary_email \\\n", + "0000-0002-9158-1757 1 1 1 \n", + "0000-0002-8774-0030 1 1 1 \n", + "\n", + " given_names family_name biography other_names \\\n", + "0000-0002-9158-1757 patrick davey NaN NaN \n", + "0000-0002-8774-0030 patrick davey NaN NaN \n", + "\n", + " primary_email \\\n", + "0000-0002-9158-1757 patrick.davey@monash.edu \n", + "0000-0002-8774-0030 patrick.davey@monash.edu \n", + "\n", + " keywords \\\n", + "0000-0002-9158-1757 [radiochemistry, bioinorganic chemistry, inorg... \n", + "0000-0002-8774-0030 NaN \n", + "\n", + " external_ids education \\\n", + "0000-0002-9158-1757 NaN NaN \n", + "0000-0002-8774-0030 NaN NaN \n", + "\n", + " employment \\\n", + "0000-0002-9158-1757 [[phd student, monash university, melbourne, ,... \n", + "0000-0002-8774-0030 [[phd student, monash university, melbourne, v... \n", + "\n", + " n_works works_source activation_date \\\n", + "0000-0002-9158-1757 0 NaN 2019-05-09t23:01:02.170z \n", + "0000-0002-8774-0030 1 [crossref] 2018-09-11t10:47:10.997z \n", + "\n", + " last_update_date n_doi n_arxiv n_pmc \\\n", + "0000-0002-9158-1757 2019-08-20t03:00:17.844z 0 0 0 \n", + "0000-0002-8774-0030 2021-02-09t06:21:44.138z 1 0 0 \n", + "\n", + " n_other_pids label primary_email_domain \\\n", + "0000-0002-9158-1757 0 0 monash.edu \n", + "0000-0002-8774-0030 0 0 monash.edu \n", + "\n", + " other_email_domains url_domains n_emails n_urls n_ids \\\n", + "0000-0002-9158-1757 NaN NaN NaN NaN NaN \n", + "0000-0002-8774-0030 NaN NaN NaN NaN NaN \n", + "\n", + " n_keywords n_education n_employment \n", + "0000-0002-9158-1757 4.0 NaN 1.0 \n", + "0000-0002-8774-0030 NaN NaN 1.0 " + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[df['primary_email'] == 'patrick.davey@monash.edu']" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], - "source": [ - "df['primary_email_domain'] = df[df.primary_email.notna()]['primary_email'].apply(lambda x: x.split('@')[1])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "count 123851\n", + "unique 17089\n", + "top gmail.com\n", + "freq 26540\n", + "Name: primary_email_domain, dtype: object" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df['primary_email_domain'].describe()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcid
primary_email_domain
gmail.com26540
hotmail.com3769
yahoo.com2614
163.com2109
yuhs.ac1132
......
imean-biotech.com1
imec.msu.ru1
imedea.uib-csic.es1
imes.uni-hannover.de1
zzuli.edu.cn1
\n", + "

17089 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " orcid\n", + "primary_email_domain \n", + "gmail.com 26540\n", + "hotmail.com 3769\n", + "yahoo.com 2614\n", + "163.com 2109\n", + "yuhs.ac 1132\n", + "... ...\n", + "imean-biotech.com 1\n", + "imec.msu.ru 1\n", + "imedea.uib-csic.es 1\n", + "imes.uni-hannover.de 1\n", + "zzuli.edu.cn 1\n", + "\n", + "[17089 rows x 1 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "top_primary_emails = df[['primary_email_domain', 'orcid']]\\\n", " .groupby('primary_email_domain')\\\n", @@ -717,9 +1432,940 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "linkText": "Export to plot.ly", + "plotlyServerURL": "https://plot.ly", + "showLink": false + }, + "data": [ + { + "type": "bar", + "x": [ + "gmail.com", + "hotmail.com", + "yahoo.com", + "163.com", + "yuhs.ac", + "qq.com", + "outlook.com", + "126.com", + "bu.edu", + "usgs.gov", + "mail.ru", + "yahoo.com.br", + "usp.br", + "ua.pt", + "umich.edu", + "ust.hk", + "foxmail.com", + "uomustansiriyah.edu.iq", + "yandex.ru", + "uq.edu.au", + "ukr.net", + "unesp.br", + "ucl.ac.uk", + "ieee.org", + "naver.com", + "st-annes.ox.ac.uk", + "stcatz.ox.ac.uk", + "yahoo.fr", + "ucm.es", + "live.com" + ], + "y": [ + 26540, + 3769, + 2614, + 2109, + 1132, + 1056, + 940, + 762, + 630, + 584, + 575, + 458, + 457, + 300, + 290, + 277, + 258, + 247, + 242, + 235, + 225, + 218, + 207, + 204, + 187, + 184, + 184, + 172, + 171, + 163 + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Top-30 email domains" + }, + "xaxis": { + "range": [ + -0.5, + 29.5 + ], + "tickangle": 45, + "tickfont": { + "size": 12 + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "set_top_n(30)\n", "data = [\n", @@ -746,49 +2392,353 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def extract_email_domains(lst):\n", - " res = []\n", - " for email in lst:\n", - " res.append(email.split('@')[1])\n", - " return res" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df['other_email_domains'] = df[df.other_emails.notna()]['other_emails'].apply(lambda x: extract_email_domains(x))" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesprimary_emailkeywordsexternal_idseducationemploymentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsurl_domainsn_emailsn_urlsn_idsn_keywordsn_educationn_employment
0000-0002-5916-446X111antonio gilvanteixeira júniorNaN[teixeira, antônio gilvan, júnior, antonio gil...gilvan.junior@aluno.ufca.edu.br[ethicis; medicine; infectology; neurology; ne...[[scopus author id, 56647743200], [scopus auth...[[faculty of health and life sciences, , unive...NaN14[antonio gilvan teixeira júnior, scopus - else...2016-05-18t11:26:36.642z2016-09-20t18:25:05.728z130080aluno.ufca.edu.br[liverpool.ac.uk][researchgate.net, academia.edu, cnpq.br]1.03.04.01.01.0NaN
0000-0002-8742-947X111aarontan shing loongNaNNaNaaron.tanshingloong@wadh.ox.ac.ukNaNNaN[[ruskin school of art; wadham college, , univ...NaN0NaN2015-10-05t23:10:08.771z2016-06-14t19:55:50.313z00000wadh.ox.ac.uk[rsa.ox.ac.uk]NaN1.0NaNNaNNaN1.0NaN
0000-0001-9097-2281111abhisheksolankiNaNNaNNaNNaNNaNNaN[[senior engineer, robert bosch (india), benga...1[abhishek solanki]2019-04-22t04:43:06.232z2020-07-02t14:18:28.305z00000NaN[in.bosch.com][github.com, linkedin.com]1.02.0NaNNaNNaN2.0
0000-0002-8614-3007111adamarraNaNNaNNaNNaNNaNNaNNaN0NaN2017-11-15t06:33:45.625z2017-11-15t06:44:02.998z00000NaN[hct.ac.ae]NaN1.0NaNNaNNaNNaNNaN
0000-0001-9884-5498111albertoronzaniNaNNaNalberto@aronza.comNaNNaNNaN[[research scientist, vtt technical research c...19[crossref metadata search, alberto ronzani, cr...2014-04-16t13:21:54.287z2020-09-28t15:10:37.439z180030aronza.com[vtt.fi]NaN1.0NaNNaNNaNNaN1.0
\n", + "
" + ], + "text/plain": [ + " orcid verified_email verified_primary_email \\\n", + "0000-0002-5916-446X 1 1 1 \n", + "0000-0002-8742-947X 1 1 1 \n", + "0000-0001-9097-2281 1 1 1 \n", + "0000-0002-8614-3007 1 1 1 \n", + "0000-0001-9884-5498 1 1 1 \n", + "\n", + " given_names family_name biography \\\n", + "0000-0002-5916-446X antonio gilvan teixeira júnior NaN \n", + "0000-0002-8742-947X aaron tan shing loong NaN \n", + "0000-0001-9097-2281 abhishek solanki NaN \n", + "0000-0002-8614-3007 adam arra NaN \n", + "0000-0001-9884-5498 alberto ronzani NaN \n", + "\n", + " other_names \\\n", + "0000-0002-5916-446X [teixeira, antônio gilvan, júnior, antonio gil... \n", + "0000-0002-8742-947X NaN \n", + "0000-0001-9097-2281 NaN \n", + "0000-0002-8614-3007 NaN \n", + "0000-0001-9884-5498 NaN \n", + "\n", + " primary_email \\\n", + "0000-0002-5916-446X gilvan.junior@aluno.ufca.edu.br \n", + "0000-0002-8742-947X aaron.tanshingloong@wadh.ox.ac.uk \n", + "0000-0001-9097-2281 NaN \n", + "0000-0002-8614-3007 NaN \n", + "0000-0001-9884-5498 alberto@aronza.com \n", + "\n", + " keywords \\\n", + "0000-0002-5916-446X [ethicis; medicine; infectology; neurology; ne... \n", + "0000-0002-8742-947X NaN \n", + "0000-0001-9097-2281 NaN \n", + "0000-0002-8614-3007 NaN \n", + "0000-0001-9884-5498 NaN \n", + "\n", + " external_ids \\\n", + "0000-0002-5916-446X [[scopus author id, 56647743200], [scopus auth... \n", + "0000-0002-8742-947X NaN \n", + "0000-0001-9097-2281 NaN \n", + "0000-0002-8614-3007 NaN \n", + "0000-0001-9884-5498 NaN \n", + "\n", + " education \\\n", + "0000-0002-5916-446X [[faculty of health and life sciences, , unive... \n", + "0000-0002-8742-947X [[ruskin school of art; wadham college, , univ... \n", + "0000-0001-9097-2281 NaN \n", + "0000-0002-8614-3007 NaN \n", + "0000-0001-9884-5498 NaN \n", + "\n", + " employment \\\n", + "0000-0002-5916-446X NaN \n", + "0000-0002-8742-947X NaN \n", + "0000-0001-9097-2281 [[senior engineer, robert bosch (india), benga... \n", + "0000-0002-8614-3007 NaN \n", + "0000-0001-9884-5498 [[research scientist, vtt technical research c... \n", + "\n", + " n_works \\\n", + "0000-0002-5916-446X 14 \n", + "0000-0002-8742-947X 0 \n", + "0000-0001-9097-2281 1 \n", + "0000-0002-8614-3007 0 \n", + "0000-0001-9884-5498 19 \n", + "\n", + " works_source \\\n", + "0000-0002-5916-446X [antonio gilvan teixeira júnior, scopus - else... \n", + "0000-0002-8742-947X NaN \n", + "0000-0001-9097-2281 [abhishek solanki] \n", + "0000-0002-8614-3007 NaN \n", + "0000-0001-9884-5498 [crossref metadata search, alberto ronzani, cr... \n", + "\n", + " activation_date last_update_date \\\n", + "0000-0002-5916-446X 2016-05-18t11:26:36.642z 2016-09-20t18:25:05.728z \n", + "0000-0002-8742-947X 2015-10-05t23:10:08.771z 2016-06-14t19:55:50.313z \n", + "0000-0001-9097-2281 2019-04-22t04:43:06.232z 2020-07-02t14:18:28.305z \n", + "0000-0002-8614-3007 2017-11-15t06:33:45.625z 2017-11-15t06:44:02.998z \n", + "0000-0001-9884-5498 2014-04-16t13:21:54.287z 2020-09-28t15:10:37.439z \n", + "\n", + " n_doi n_arxiv n_pmc n_other_pids label \\\n", + "0000-0002-5916-446X 13 0 0 8 0 \n", + "0000-0002-8742-947X 0 0 0 0 0 \n", + "0000-0001-9097-2281 0 0 0 0 0 \n", + "0000-0002-8614-3007 0 0 0 0 0 \n", + "0000-0001-9884-5498 18 0 0 3 0 \n", + "\n", + " primary_email_domain other_email_domains \\\n", + "0000-0002-5916-446X aluno.ufca.edu.br [liverpool.ac.uk] \n", + "0000-0002-8742-947X wadh.ox.ac.uk [rsa.ox.ac.uk] \n", + "0000-0001-9097-2281 NaN [in.bosch.com] \n", + "0000-0002-8614-3007 NaN [hct.ac.ae] \n", + "0000-0001-9884-5498 aronza.com [vtt.fi] \n", + "\n", + " url_domains n_emails \\\n", + "0000-0002-5916-446X [researchgate.net, academia.edu, cnpq.br] 1.0 \n", + "0000-0002-8742-947X NaN 1.0 \n", + "0000-0001-9097-2281 [github.com, linkedin.com] 1.0 \n", + "0000-0002-8614-3007 NaN 1.0 \n", + "0000-0001-9884-5498 NaN 1.0 \n", + "\n", + " n_urls n_ids n_keywords n_education n_employment \n", + "0000-0002-5916-446X 3.0 4.0 1.0 1.0 NaN \n", + "0000-0002-8742-947X NaN NaN NaN 1.0 NaN \n", + "0000-0001-9097-2281 2.0 NaN NaN NaN 2.0 \n", + "0000-0002-8614-3007 NaN NaN NaN NaN NaN \n", + "0000-0001-9884-5498 NaN NaN NaN NaN 1.0 " + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "df[df['other_email_domains'].notna()].head()" + "df[df.other_email_domains.notna()].head()" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df['n_emails'] = df['other_emails'].str.len()" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 62, "metadata": {}, "outputs": [], "source": [ @@ -797,14 +2747,945 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 64, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "linkText": "Export to plot.ly", + "plotlyServerURL": "https://plot.ly", + "showLink": false + }, + "data": [ + { + "type": "bar", + "x": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29 + ], + "y": [ + 12, + 9, + 7, + 7, + 6, + 6, + 6, + 6, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 5, + 4, + 4 + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Top 30 ORCID iDs by email" + }, + "xaxis": { + "range": [ + -0.5, + 29.5 + ], + "tickangle": 45, + "tickfont": { + "size": 12 + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "set_top_n(30)\n", "data = [\n", " go.Bar(\n", - " x=emails_by_orcid[:TOP_N]['orcid'],\n", + " x=emails_by_orcid[:TOP_N].index,\n", " y=emails_by_orcid[:TOP_N]['n_emails']\n", " )\n", "]\n", @@ -819,7 +3700,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -833,9 +3714,940 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "linkText": "Export to plot.ly", + "plotlyServerURL": "https://plot.ly", + "showLink": false + }, + "data": [ + { + "type": "bar", + "x": [ + "gmail.com", + "hotmail.com", + "yahoo.com", + "qq.com", + "163.com", + "outlook.com", + "126.com", + "usp.br", + "ieee.org", + "mail.ru", + "yahoo.com.br", + "unesp.br", + "sbs.ox.ac.uk", + "yuhs.ac", + "naver.com", + "icloud.com", + "foxmail.com", + "uq.edu.au", + "ua.pt", + "cam.ac.uk", + "imperial.ac.uk", + "ukr.net", + "law.ox.ac.uk", + "mit.edu", + "stanford.edu", + "monash.edu", + "ucl.ac.uk", + "education.ox.ac.uk", + "ucm.es", + "conted.ox.ac.uk" + ], + "y": [ + 11116, + 1541, + 1295, + 779, + 774, + 425, + 260, + 236, + 224, + 149, + 147, + 141, + 136, + 133, + 130, + 118, + 96, + 94, + 89, + 84, + 77, + 76, + 75, + 74, + 71, + 70, + 68, + 67, + 66, + 64 + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Top 30 other email domains" + }, + "xaxis": { + "range": [ + -0.5, + 29.5 + ], + "tickangle": 45, + "tickfont": { + "size": 12 + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "set_top_n(30)\n", "data = [\n", @@ -853,6 +4665,13 @@ "plotly.offline.iplot(fig)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This somehow makes sense, legitimate users could put the gmail account as primary for login purposes and have institutional addresses as other email addresses. It makes also the life easier upon relocation." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -862,13 +4681,626 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesprimary_emailkeywordsexternal_idseducationemploymentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsurl_domainsn_emailsn_urlsn_idsn_keywordsn_educationn_employment
0000-0001-9097-2281111abhisheksolankiNaNNaNNaNNaNNaNNaN[[senior engineer, robert bosch (india), benga...1[abhishek solanki]2019-04-22t04:43:06.232z2020-07-02t14:18:28.305z00000NaN[in.bosch.com][github.com, linkedin.com]1.02.0NaNNaNNaN2.0
0000-0002-8614-3007111adamarraNaNNaNNaNNaNNaNNaNNaN0NaN2017-11-15t06:33:45.625z2017-11-15t06:44:02.998z00000NaN[hct.ac.ae]NaN1.0NaNNaNNaNNaNNaN
0000-0003-3728-6439111alejandraecheverry velásquezalejandra echeverry is an industrial electrici...NaNNaN[energy., renewable, science, control, innovat...NaN[[, electrical engineer, institución universit...[[professor, institución universitaria pascual...1[crossref]2019-03-31t00:00:42.929z2020-09-06t02:18:54.290z10000NaN[pascualbravo.edu.co]NaN1.0NaNNaN7.01.01.0
0000-0001-8330-7443111andreatesonieroNaNNaNNaNNaN[[researcherid, d-9056-2015]][[department of geophysics, master of science ...[[postdoctoral associate, yale university, new...4[andrea tesoniero]2015-03-09t11:59:06.093z2020-08-20t15:03:23.447z40020NaN[yale.edu]NaN1.0NaN1.0NaN4.02.0
0000-0001-9670-515X111esma esinyildirimNaNNaNNaN[chemical engineering, pharmacognosy, natural ...NaN[[business management, master of science, ista...NaN0NaN2020-07-26t10:38:03.721z2020-07-26t10:52:26.539z00000NaN[gmail.com]NaN1.0NaNNaN3.03.0NaN
.............................................................................................
0000-0003-1204-6009111nathanwalkNaNNaNNaNNaNNaN[[department of physics, doctor of philosophy,...[[, university of oxford, oxford, oxfordshire,...10[crossref metadata search]2016-07-28t14:24:16.844z2020-10-13t11:47:50.621z100000NaN[cs.ox.ac.uk][fu-berlin.de]1.01.0NaNNaN3.02.0
0000-0002-3472-7668111rafvandeveldeNaNNaNNaNNaNNaN[[chemical engineering technology, master, kat...[[phd researcher, katholieke universiteit leuv...0NaN2020-10-14t13:56:44.779z2020-10-16t14:21:40.673z00000NaN[kuleuven.be][linkedin.com]1.01.0NaNNaN2.01.0
0000-0002-9602-0529111carlos augustofinelliNaNNaNNaNNaNNaNNaNNaN1[crossref]2013-09-16t16:52:06.120z2020-12-01t22:47:08.074z10000NaN[cecot.com.br]NaN1.0NaNNaNNaNNaNNaN
0000-0003-4402-5982111filipede almeida araújoNaNNaNNaNNaNNaN[[materials science, msc. materials science, m...[[co-owner, aeft acessory, manaus, amazonas, b...0NaN2020-03-02t20:11:01.699z2020-12-04t13:53:39.404z00000NaN[ime.eb.br]NaN1.0NaNNaNNaN2.01.0
0000-0002-1734-7241111manareldeenahmedNaNNaNNaN[atomistic simulation, ai chips, thin films, d...NaNNaN[[post-doctor, zhejiang university, hangzhou, ...6[manareldeen ahmed]2017-02-17t13:18:36.540z2020-12-04t02:04:36.668z60030NaN[hotmail.com]NaN1.0NaNNaN5.0NaN1.0
\n", + "

19692 rows × 30 columns

\n", + "
" + ], + "text/plain": [ + " orcid verified_email verified_primary_email \\\n", + "0000-0001-9097-2281 1 1 1 \n", + "0000-0002-8614-3007 1 1 1 \n", + "0000-0003-3728-6439 1 1 1 \n", + "0000-0001-8330-7443 1 1 1 \n", + "0000-0001-9670-515X 1 1 1 \n", + "... ... ... ... \n", + "0000-0003-1204-6009 1 1 1 \n", + "0000-0002-3472-7668 1 1 1 \n", + "0000-0002-9602-0529 1 1 1 \n", + "0000-0003-4402-5982 1 1 1 \n", + "0000-0002-1734-7241 1 1 1 \n", + "\n", + " given_names family_name \\\n", + "0000-0001-9097-2281 abhishek solanki \n", + "0000-0002-8614-3007 adam arra \n", + "0000-0003-3728-6439 alejandra echeverry velásquez \n", + "0000-0001-8330-7443 andrea tesoniero \n", + "0000-0001-9670-515X esma esin yildirim \n", + "... ... ... \n", + "0000-0003-1204-6009 nathan walk \n", + "0000-0002-3472-7668 raf vandevelde \n", + "0000-0002-9602-0529 carlos augusto finelli \n", + "0000-0003-4402-5982 filipe de almeida araújo \n", + "0000-0002-1734-7241 manareldeen ahmed \n", + "\n", + " biography \\\n", + "0000-0001-9097-2281 NaN \n", + "0000-0002-8614-3007 NaN \n", + "0000-0003-3728-6439 alejandra echeverry is an industrial electrici... \n", + "0000-0001-8330-7443 NaN \n", + "0000-0001-9670-515X NaN \n", + "... ... \n", + "0000-0003-1204-6009 NaN \n", + "0000-0002-3472-7668 NaN \n", + "0000-0002-9602-0529 NaN \n", + "0000-0003-4402-5982 NaN \n", + "0000-0002-1734-7241 NaN \n", + "\n", + " other_names primary_email \\\n", + "0000-0001-9097-2281 NaN NaN \n", + "0000-0002-8614-3007 NaN NaN \n", + "0000-0003-3728-6439 NaN NaN \n", + "0000-0001-8330-7443 NaN NaN \n", + "0000-0001-9670-515X NaN NaN \n", + "... ... ... \n", + "0000-0003-1204-6009 NaN NaN \n", + "0000-0002-3472-7668 NaN NaN \n", + "0000-0002-9602-0529 NaN NaN \n", + "0000-0003-4402-5982 NaN NaN \n", + "0000-0002-1734-7241 NaN NaN \n", + "\n", + " keywords \\\n", + "0000-0001-9097-2281 NaN \n", + "0000-0002-8614-3007 NaN \n", + "0000-0003-3728-6439 [energy., renewable, science, control, innovat... \n", + "0000-0001-8330-7443 NaN \n", + "0000-0001-9670-515X [chemical engineering, pharmacognosy, natural ... \n", + "... ... \n", + "0000-0003-1204-6009 NaN \n", + "0000-0002-3472-7668 NaN \n", + "0000-0002-9602-0529 NaN \n", + "0000-0003-4402-5982 NaN \n", + "0000-0002-1734-7241 [atomistic simulation, ai chips, thin films, d... \n", + "\n", + " external_ids \\\n", + "0000-0001-9097-2281 NaN \n", + "0000-0002-8614-3007 NaN \n", + "0000-0003-3728-6439 NaN \n", + "0000-0001-8330-7443 [[researcherid, d-9056-2015]] \n", + "0000-0001-9670-515X NaN \n", + "... ... \n", + "0000-0003-1204-6009 NaN \n", + "0000-0002-3472-7668 NaN \n", + "0000-0002-9602-0529 NaN \n", + "0000-0003-4402-5982 NaN \n", + "0000-0002-1734-7241 NaN \n", + "\n", + " education \\\n", + "0000-0001-9097-2281 NaN \n", + "0000-0002-8614-3007 NaN \n", + "0000-0003-3728-6439 [[, electrical engineer, institución universit... \n", + "0000-0001-8330-7443 [[department of geophysics, master of science ... \n", + "0000-0001-9670-515X [[business management, master of science, ista... \n", + "... ... \n", + "0000-0003-1204-6009 [[department of physics, doctor of philosophy,... \n", + "0000-0002-3472-7668 [[chemical engineering technology, master, kat... \n", + "0000-0002-9602-0529 NaN \n", + "0000-0003-4402-5982 [[materials science, msc. materials science, m... \n", + "0000-0002-1734-7241 NaN \n", + "\n", + " employment \\\n", + "0000-0001-9097-2281 [[senior engineer, robert bosch (india), benga... \n", + "0000-0002-8614-3007 NaN \n", + "0000-0003-3728-6439 [[professor, institución universitaria pascual... \n", + "0000-0001-8330-7443 [[postdoctoral associate, yale university, new... \n", + "0000-0001-9670-515X NaN \n", + "... ... \n", + "0000-0003-1204-6009 [[, university of oxford, oxford, oxfordshire,... \n", + "0000-0002-3472-7668 [[phd researcher, katholieke universiteit leuv... \n", + "0000-0002-9602-0529 NaN \n", + "0000-0003-4402-5982 [[co-owner, aeft acessory, manaus, amazonas, b... \n", + "0000-0002-1734-7241 [[post-doctor, zhejiang university, hangzhou, ... \n", + "\n", + " n_works works_source \\\n", + "0000-0001-9097-2281 1 [abhishek solanki] \n", + "0000-0002-8614-3007 0 NaN \n", + "0000-0003-3728-6439 1 [crossref] \n", + "0000-0001-8330-7443 4 [andrea tesoniero] \n", + "0000-0001-9670-515X 0 NaN \n", + "... ... ... \n", + "0000-0003-1204-6009 10 [crossref metadata search] \n", + "0000-0002-3472-7668 0 NaN \n", + "0000-0002-9602-0529 1 [crossref] \n", + "0000-0003-4402-5982 0 NaN \n", + "0000-0002-1734-7241 6 [manareldeen ahmed] \n", + "\n", + " activation_date last_update_date \\\n", + "0000-0001-9097-2281 2019-04-22t04:43:06.232z 2020-07-02t14:18:28.305z \n", + "0000-0002-8614-3007 2017-11-15t06:33:45.625z 2017-11-15t06:44:02.998z \n", + "0000-0003-3728-6439 2019-03-31t00:00:42.929z 2020-09-06t02:18:54.290z \n", + "0000-0001-8330-7443 2015-03-09t11:59:06.093z 2020-08-20t15:03:23.447z \n", + "0000-0001-9670-515X 2020-07-26t10:38:03.721z 2020-07-26t10:52:26.539z \n", + "... ... ... \n", + "0000-0003-1204-6009 2016-07-28t14:24:16.844z 2020-10-13t11:47:50.621z \n", + "0000-0002-3472-7668 2020-10-14t13:56:44.779z 2020-10-16t14:21:40.673z \n", + "0000-0002-9602-0529 2013-09-16t16:52:06.120z 2020-12-01t22:47:08.074z \n", + "0000-0003-4402-5982 2020-03-02t20:11:01.699z 2020-12-04t13:53:39.404z \n", + "0000-0002-1734-7241 2017-02-17t13:18:36.540z 2020-12-04t02:04:36.668z \n", + "\n", + " n_doi n_arxiv n_pmc n_other_pids label \\\n", + "0000-0001-9097-2281 0 0 0 0 0 \n", + "0000-0002-8614-3007 0 0 0 0 0 \n", + "0000-0003-3728-6439 1 0 0 0 0 \n", + "0000-0001-8330-7443 4 0 0 2 0 \n", + "0000-0001-9670-515X 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "0000-0003-1204-6009 10 0 0 0 0 \n", + "0000-0002-3472-7668 0 0 0 0 0 \n", + "0000-0002-9602-0529 1 0 0 0 0 \n", + "0000-0003-4402-5982 0 0 0 0 0 \n", + "0000-0002-1734-7241 6 0 0 3 0 \n", + "\n", + " primary_email_domain other_email_domains \\\n", + "0000-0001-9097-2281 NaN [in.bosch.com] \n", + "0000-0002-8614-3007 NaN [hct.ac.ae] \n", + "0000-0003-3728-6439 NaN [pascualbravo.edu.co] \n", + "0000-0001-8330-7443 NaN [yale.edu] \n", + "0000-0001-9670-515X NaN [gmail.com] \n", + "... ... ... \n", + "0000-0003-1204-6009 NaN [cs.ox.ac.uk] \n", + "0000-0002-3472-7668 NaN [kuleuven.be] \n", + "0000-0002-9602-0529 NaN [cecot.com.br] \n", + "0000-0003-4402-5982 NaN [ime.eb.br] \n", + "0000-0002-1734-7241 NaN [hotmail.com] \n", + "\n", + " url_domains n_emails n_urls n_ids \\\n", + "0000-0001-9097-2281 [github.com, linkedin.com] 1.0 2.0 NaN \n", + "0000-0002-8614-3007 NaN 1.0 NaN NaN \n", + "0000-0003-3728-6439 NaN 1.0 NaN NaN \n", + "0000-0001-8330-7443 NaN 1.0 NaN 1.0 \n", + "0000-0001-9670-515X NaN 1.0 NaN NaN \n", + "... ... ... ... ... \n", + "0000-0003-1204-6009 [fu-berlin.de] 1.0 1.0 NaN \n", + "0000-0002-3472-7668 [linkedin.com] 1.0 1.0 NaN \n", + "0000-0002-9602-0529 NaN 1.0 NaN NaN \n", + "0000-0003-4402-5982 NaN 1.0 NaN NaN \n", + "0000-0002-1734-7241 NaN 1.0 NaN NaN \n", + "\n", + " n_keywords n_education n_employment \n", + "0000-0001-9097-2281 NaN NaN 2.0 \n", + "0000-0002-8614-3007 NaN NaN NaN \n", + "0000-0003-3728-6439 7.0 1.0 1.0 \n", + "0000-0001-8330-7443 NaN 4.0 2.0 \n", + "0000-0001-9670-515X 3.0 3.0 NaN \n", + "... ... ... ... \n", + "0000-0003-1204-6009 NaN 3.0 2.0 \n", + "0000-0002-3472-7668 NaN 2.0 1.0 \n", + "0000-0002-9602-0529 NaN NaN NaN \n", + "0000-0003-4402-5982 NaN 2.0 1.0 \n", + "0000-0002-1734-7241 5.0 NaN 1.0 \n", + "\n", + "[19692 rows x 30 columns]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "df[df['primary_email'].isna() & df['other_emails'].notna()]" + "df[df.primary_email.isna() & df.other_email_domains.notna()]" ] }, { @@ -894,52 +5326,447 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesprimary_emailkeywordsexternal_idseducationemploymentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsurl_domainsn_emailsn_urlsn_idsn_keywordsn_educationn_employment
0000-0001-7402-0096111NaNNaNNaNNaNNaNNaNNaNNaN[[, kth royal institute of technology, stockho...0NaN2015-01-11t15:13:06.467z2016-06-14t23:55:59.896z00000NaNNaN[kth.se]NaN1.0NaNNaNNaN1.0
0000-0001-8377-3508111NaNNaNNaN[fontana, milena da silva]NaN[educação; informática; matemática.]NaNNaN[[, instituto federal de educação, ciência e t...0NaN2018-05-23t23:39:04.534z2019-10-16t02:50:11.007z00000NaNNaN[cnpq.br]NaN1.0NaN1.0NaN3.0
0000-0002-2638-4108111NaNNaNinvestigador de la universidad de oviedo. depa...NaNNaN[constitutional law, history of political thou...[[scopus author id, 54394231000]][[public law, ph doctor, university of oviedo,...[[professor of constitutional law, university ...1[crossref]2013-03-25t14:38:06.016z2020-07-01t13:10:37.025z10000NaNNaN[unioviedo.es]NaN1.01.03.01.01.0
0000-0003-1435-6545111NaNNaNNaNNaNNaN[migration, culture cell, prostate cancer][[researcherid, p-2223-2018]][[morfologia, , universidade estadual paulista...[[, universidade estadual paulista (unesp), in...0NaN2018-08-09t12:12:24.405z2020-04-22t01:38:03.184z00000NaNNaN[cnpq.br, linkedin.com]NaN2.01.03.01.01.0
0000-0003-1284-9741111alex percy antoniomanriquez paisigNaNNaNNaNNaNNaNNaNNaN0NaN2020-09-08t20:04:33.906z2020-09-08t20:25:55.432z00000NaNNaN[youtube.com]NaN1.0NaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " orcid verified_email verified_primary_email \\\n", + "0000-0001-7402-0096 1 1 1 \n", + "0000-0001-8377-3508 1 1 1 \n", + "0000-0002-2638-4108 1 1 1 \n", + "0000-0003-1435-6545 1 1 1 \n", + "0000-0003-1284-9741 1 1 1 \n", + "\n", + " given_names family_name \\\n", + "0000-0001-7402-0096 NaN NaN \n", + "0000-0001-8377-3508 NaN NaN \n", + "0000-0002-2638-4108 NaN NaN \n", + "0000-0003-1435-6545 NaN NaN \n", + "0000-0003-1284-9741 alex percy antonio manriquez paisig \n", + "\n", + " biography \\\n", + "0000-0001-7402-0096 NaN \n", + "0000-0001-8377-3508 NaN \n", + "0000-0002-2638-4108 investigador de la universidad de oviedo. depa... \n", + "0000-0003-1435-6545 NaN \n", + "0000-0003-1284-9741 NaN \n", + "\n", + " other_names primary_email \\\n", + "0000-0001-7402-0096 NaN NaN \n", + "0000-0001-8377-3508 [fontana, milena da silva] NaN \n", + "0000-0002-2638-4108 NaN NaN \n", + "0000-0003-1435-6545 NaN NaN \n", + "0000-0003-1284-9741 NaN NaN \n", + "\n", + " keywords \\\n", + "0000-0001-7402-0096 NaN \n", + "0000-0001-8377-3508 [educação; informática; matemática.] \n", + "0000-0002-2638-4108 [constitutional law, history of political thou... \n", + "0000-0003-1435-6545 [migration, culture cell, prostate cancer] \n", + "0000-0003-1284-9741 NaN \n", + "\n", + " external_ids \\\n", + "0000-0001-7402-0096 NaN \n", + "0000-0001-8377-3508 NaN \n", + "0000-0002-2638-4108 [[scopus author id, 54394231000]] \n", + "0000-0003-1435-6545 [[researcherid, p-2223-2018]] \n", + "0000-0003-1284-9741 NaN \n", + "\n", + " education \\\n", + "0000-0001-7402-0096 NaN \n", + "0000-0001-8377-3508 NaN \n", + "0000-0002-2638-4108 [[public law, ph doctor, university of oviedo,... \n", + "0000-0003-1435-6545 [[morfologia, , universidade estadual paulista... \n", + "0000-0003-1284-9741 NaN \n", + "\n", + " employment \\\n", + "0000-0001-7402-0096 [[, kth royal institute of technology, stockho... \n", + "0000-0001-8377-3508 [[, instituto federal de educação, ciência e t... \n", + "0000-0002-2638-4108 [[professor of constitutional law, university ... \n", + "0000-0003-1435-6545 [[, universidade estadual paulista (unesp), in... \n", + "0000-0003-1284-9741 NaN \n", + "\n", + " n_works works_source activation_date \\\n", + "0000-0001-7402-0096 0 NaN 2015-01-11t15:13:06.467z \n", + "0000-0001-8377-3508 0 NaN 2018-05-23t23:39:04.534z \n", + "0000-0002-2638-4108 1 [crossref] 2013-03-25t14:38:06.016z \n", + "0000-0003-1435-6545 0 NaN 2018-08-09t12:12:24.405z \n", + "0000-0003-1284-9741 0 NaN 2020-09-08t20:04:33.906z \n", + "\n", + " last_update_date n_doi n_arxiv n_pmc \\\n", + "0000-0001-7402-0096 2016-06-14t23:55:59.896z 0 0 0 \n", + "0000-0001-8377-3508 2019-10-16t02:50:11.007z 0 0 0 \n", + "0000-0002-2638-4108 2020-07-01t13:10:37.025z 1 0 0 \n", + "0000-0003-1435-6545 2020-04-22t01:38:03.184z 0 0 0 \n", + "0000-0003-1284-9741 2020-09-08t20:25:55.432z 0 0 0 \n", + "\n", + " n_other_pids label primary_email_domain \\\n", + "0000-0001-7402-0096 0 0 NaN \n", + "0000-0001-8377-3508 0 0 NaN \n", + "0000-0002-2638-4108 0 0 NaN \n", + "0000-0003-1435-6545 0 0 NaN \n", + "0000-0003-1284-9741 0 0 NaN \n", + "\n", + " other_email_domains url_domains n_emails \\\n", + "0000-0001-7402-0096 NaN [kth.se] NaN \n", + "0000-0001-8377-3508 NaN [cnpq.br] NaN \n", + "0000-0002-2638-4108 NaN [unioviedo.es] NaN \n", + "0000-0003-1435-6545 NaN [cnpq.br, linkedin.com] NaN \n", + "0000-0003-1284-9741 NaN [youtube.com] NaN \n", + "\n", + " n_urls n_ids n_keywords n_education n_employment \n", + "0000-0001-7402-0096 1.0 NaN NaN NaN 1.0 \n", + "0000-0001-8377-3508 1.0 NaN 1.0 NaN 3.0 \n", + "0000-0002-2638-4108 1.0 1.0 3.0 1.0 1.0 \n", + "0000-0003-1435-6545 2.0 1.0 3.0 1.0 1.0 \n", + "0000-0003-1284-9741 1.0 NaN NaN NaN NaN " + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "def extract_url_domains(lst):\n", - " domains = []\n", - " for e in lst:\n", - " # e[0] is a string describing the url\n", - " # e[1] is the url\n", - " domain = tldextract.extract(e[1])\n", - " domains.append(domain.registered_domain)\n", - " return domains" + "df[df.url_domains.notna()].head()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, - "outputs": [], - "source": [ - "df['url_domains'] = df[df.urls.notna()]['urls'].apply(lambda x: extract_url_domains(x))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df[df['url_domains'].notna()].head()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df['n_urls'] = df['url_domains'].str.len()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidn_urls
0000-0002-1234-835X1219.0
0000-0001-7478-45391174.0
0000-0002-7392-37921169.0
0000-0002-6938-96381152.0
0000-0002-5710-40411114.0
.........
0000-0002-1686-19351NaN
0000-0002-3800-63311NaN
0000-0002-8783-58141NaN
0000-0002-7584-22831NaN
0000-0003-0529-35381NaN
\n", + "

10916574 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " orcid n_urls\n", + "0000-0002-1234-835X 1 219.0\n", + "0000-0001-7478-4539 1 174.0\n", + "0000-0002-7392-3792 1 169.0\n", + "0000-0002-6938-9638 1 152.0\n", + "0000-0002-5710-4041 1 114.0\n", + "... ... ...\n", + "0000-0002-1686-1935 1 NaN\n", + "0000-0002-3800-6331 1 NaN\n", + "0000-0002-8783-5814 1 NaN\n", + "0000-0002-7584-2283 1 NaN\n", + "0000-0003-0529-3538 1 NaN\n", + "\n", + "[10916574 rows x 2 columns]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "urls_by_orcid = df.sort_values('n_urls', ascending=False)[['orcid', 'n_urls']]\n", "urls_by_orcid" @@ -947,14 +5774,1085 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 65, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "linkText": "Export to plot.ly", + "plotlyServerURL": "https://plot.ly", + "showLink": false + }, + "data": [ + { + "type": "bar", + "x": [ + "0000-0002-1234-835X", + "0000-0001-7478-4539", + "0000-0002-7392-3792", + "0000-0002-6938-9638", + "0000-0002-5710-4041", + "0000-0003-2450-090X", + "0000-0002-3920-7389", + "0000-0002-6689-4129", + "0000-0001-5384-0001", + "0000-0002-4621-5571", + "0000-0002-7754-8889", + "0000-0001-9131-1266", + "0000-0002-9025-8632", + "0000-0002-5250-1144", + "0000-0003-0321-7339", + "0000-0003-0176-1293", + "0000-0002-7456-3848", + "0000-0002-8493-0402", + "0000-0002-9965-2425", + "0000-0001-8873-6677", + "0000-0002-3997-5070", + "0000-0002-1856-6905", + "0000-0002-4062-3603", + "0000-0002-4316-1467", + "0000-0003-1524-6268", + "0000-0002-0752-7513", + "0000-0001-5880-7091", + "0000-0003-0594-2462", + "0000-0002-1298-5252", + "0000-0003-2593-7134", + "0000-0003-2383-8386", + "0000-0003-1761-3842", + "0000-0003-3546-2312", + "0000-0002-2886-9248", + "0000-0003-4948-9268", + "0000-0003-2183-8112", + "0000-0002-1929-6054", + "0000-0003-2407-3557", + "0000-0002-7568-3403", + "0000-0003-0796-0234", + "0000-0002-9276-6921", + "0000-0002-4305-4215", + "0000-0003-1484-6958", + "0000-0001-7133-6896", + "0000-0002-4004-6666", + "0000-0002-8208-0897", + "0000-0003-0930-6121", + "0000-0003-4993-5555", + "0000-0002-9071-5450", + "0000-0002-8116-9611", + "0000-0002-3277-9659", + "0000-0001-9559-1103", + "0000-0002-8122-879X", + "0000-0002-2000-8339", + "0000-0003-2862-6315", + "0000-0002-6547-0172", + "0000-0003-4808-6619", + "0000-0002-6254-8683", + "0000-0002-5139-2660", + "0000-0001-5300-4601", + "0000-0002-0971-9375", + "0000-0003-3933-0229", + "0000-0003-1585-1134", + "0000-0003-0694-1154", + "0000-0002-4659-5391", + "0000-0001-6783-2037", + "0000-0001-6461-2573", + "0000-0003-4501-3756", + "0000-0002-2916-2893", + "0000-0001-5549-6822", + "0000-0003-4326-9336", + "0000-0001-8096-4333", + "0000-0002-8940-3177", + "0000-0001-8978-4830", + "0000-0002-6680-1703", + "0000-0002-8593-9257", + "0000-0002-5946-1595", + "0000-0002-7653-4899", + "0000-0002-5196-4905", + "0000-0003-1904-4188", + "0000-0001-8808-4867", + "0000-0001-6921-0426", + "0000-0003-1815-1993", + "0000-0002-7843-8497", + "0000-0003-1675-2840", + "0000-0001-8644-2114", + "0000-0001-8986-2528", + "0000-0001-7784-0583", + "0000-0003-0907-9870", + "0000-0002-5265-6074", + "0000-0001-7550-5802", + "0000-0002-7179-6953", + "0000-0002-3334-9386", + "0000-0001-9102-8639", + "0000-0002-0696-8560", + "0000-0001-6979-4273", + "0000-0001-7193-5039", + "0000-0001-6714-009X", + "0000-0002-9771-600X", + "0000-0001-7608-9433" + ], + "y": [ + 219, + 174, + 169, + 152, + 114, + 114, + 111, + 104, + 104, + 90, + 83, + 83, + 81, + 81, + 80, + 80, + 80, + 76, + 73, + 72, + 71, + 70, + 69, + 69, + 68, + 68, + 68, + 68, + 67, + 67, + 66, + 66, + 65, + 64, + 61, + 61, + 61, + 59, + 57, + 57, + 57, + 57, + 57, + 57, + 57, + 56, + 55, + 55, + 55, + 55, + 50, + 50, + 50, + 49, + 49, + 48, + 48, + 48, + 48, + 48, + 47, + 47, + 46, + 46, + 46, + 45, + 45, + 45, + 45, + 44, + 43, + 43, + 43, + 43, + 42, + 42, + 42, + 41, + 41, + 41, + 40, + 40, + 39, + 39, + 39, + 39, + 38, + 38, + 38, + 38, + 38, + 37, + 37, + 37, + 37, + 37, + 36, + 36, + 36, + 36 + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Top 100 ORCID iDs with URLs" + }, + "xaxis": { + "range": [ + -0.5, + 99.5 + ], + "tickangle": 45, + "tickfont": { + "size": 12 + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "set_top_n(100)\n", "data = [\n", " go.Bar(\n", - " x=urls_by_orcid[:TOP_N]['orcid'],\n", + " x=urls_by_orcid[:TOP_N].index,\n", " y=urls_by_orcid[:TOP_N]['n_urls']\n", " )\n", "]\n", @@ -969,7 +6867,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ @@ -983,9 +6881,980 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "linkText": "Export to plot.ly", + "plotlyServerURL": "https://plot.ly", + "showLink": false + }, + "data": [ + { + "type": "bar", + "x": [ + "linkedin.com", + "researchgate.net", + "google.com", + "cnpq.br", + "academia.edu", + "twitter.com", + "facebook.com", + "publons.com", + "wordpress.com", + "mendeley.com", + "instagram.com", + "github.io", + "google.com.ua", + "blogspot.com", + "github.com", + "google.es", + "helsinki.fi", + "unirioja.es", + "youtube.com", + "wixsite.com", + "ku.dk", + "scopus.com", + "", + "weebly.com", + "us.es", + "kth.se", + "cityu.edu.hk", + "kcl.ac.uk", + "au.dk", + "man.ac.uk", + "google.com.au", + "ucl.ac.uk", + "sdu.dk", + "ugr.es", + "researcherid.com", + "mq.edu.au", + "ntu.edu.tw", + "rug.nl", + "colciencias.gov.co", + "google.co.in", + "dtu.dk", + "bris.ac.uk", + "uwa.edu.au", + "bu.edu", + "uc3m.es", + "vub.be", + "monash.edu", + "google.co.uk", + "aau.dk", + "lancs.ac.uk" + ], + "y": [ + 77558, + 67357, + 44397, + 24439, + 21054, + 18771, + 15121, + 10622, + 8996, + 6978, + 5881, + 5479, + 5335, + 5240, + 5199, + 5134, + 4711, + 4572, + 4396, + 4120, + 3756, + 3558, + 3494, + 3115, + 3034, + 2952, + 2793, + 2720, + 2717, + 2693, + 2606, + 2585, + 2465, + 2224, + 2133, + 2131, + 2093, + 1940, + 1927, + 1904, + 1880, + 1838, + 1808, + 1805, + 1803, + 1788, + 1772, + 1652, + 1652, + 1648 + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Top-50 URL domains" + }, + "xaxis": { + "range": [ + -0.5, + 49.5 + ], + "tickangle": 45, + "tickfont": { + "size": 12 + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "set_top_n(50)\n", "data = [\n", @@ -1012,33 +7881,2036 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesprimary_emailkeywordsexternal_idseducationemploymentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsurl_domainsn_emailsn_urlsn_idsn_keywordsn_educationn_employment
0000-0002-5710-4041111ryszardromaniukprofessor of electronics and communications en...[r.romaniuk, r.s.romaniuk, ryszard romaniuk, r...rrom@ise.pw.edu.pl[measurement systems, telecommunications, rese...[[isni, 0000000071432485], [researcherid, b-91...[[faculty of electronics and information techn...[[professor, institute director, politechnika ...5008[inspire-hep, researcherid, isni2orcid search ...2013-01-20t12:09:21.600z2021-03-11t20:57:13.284z122125017420ise.pw.edu.pl[ise.pw.edu.pl, elka.pw.edu.pl, cern.ch][google.pl, publons.com, scopus.com, mendeley....3.0114.03.05.01.01.0
0000-0002-1929-6054111franklin américocanaza choquedocente-investigador social. maestrando en der...[franklin américo canaza-choque , franklin a. ...leo_123fa@hotmail.com[justicia global; democracia; derechos humanos...[[researcherid, p-8613-2018], [loop profile, 8...[[facultad de ciencias de la educación , maest...[[investigador social, universidad católica de...38[researcherid, base - bielefeld academic searc...2017-09-15t19:45:43.483z2021-03-14t20:20:21.282z2900330hotmail.com[gmail.com, gmail.com, hotmail.com, baldwin.ed...[concytec.gob.pe, redalyc.org, redalyc.org, un...5.061.04.02.01.01.0
0000-0003-2407-3557111abdulazizabdul aziz was born on may 25, 1973, in brebes...[abdul aziz, aziz, abdul, aziz, a., aziz, abd,...NaN[etika bisnis islam, ekonomi islam, ilmu ekono...NaN[[ilmu ekonomi, dr, universitas borobudur, jak...[[assisten professor/dr, institut agama islam ...72[base - bielefeld academic search engine, abdu...2016-09-12t04:41:24.842z2021-01-26t11:58:33.039z1900770NaNNaN[google.com, syekhnurjati.ac.id, orcid.org, bl...NaN59.0NaN4.03.01.0
0000-0002-3997-5070111dr. parameshacharib ddr. parameshachari b dacm distinguished speake...[dr. parameshachari b d]NaN[honorary secretary| iete mysuru centre, profe...[[researcherid, f-7045-2018], [scopus author i...[[electronics and communication engineering, p...[[acm distinguished speaker (volunteer), assoc...93[publons, multidisciplinary digital publishing...2016-08-24t11:00:30.403z2021-03-14t07:11:09.817z4700480NaNNaN[geethashishu.in, geethashishu.in, acm.org, go...NaN71.03.06.05.010.0
0000-0003-2450-090X111eduardbabulakprofessor eduard babulak is accomplished inter...[professor eduard babulak]NaN[internet of things, computer networking, inte...[[scopus author id, 6506867432], [researcherid...[[information technology, doctor habilitated (...[[consultant, horizon 2020 framework programme...274[the lens, base - bielefeld academic search en...2013-04-03t08:02:30.013z2021-02-28t10:07:13.231z199011740NaNNaN[worldassessmentcouncil.org, spseke.sk, bcs.or...NaN114.05.08.06.022.0
0000-0003-2593-7134111aanjaelaniall my papers can be downloaded from portal:re...[jaelani, a., jaelani, aan]aan_jaelani@syekhnurjati.ac.id[islamic economics, history of islamic economi...[[scopus author id, 57195963463], [loop profil...[[post graduate, s3/dr, universitas islam nege...[[dr, institut agama islam negeri syekh nurjat...79[publons, aan jaelani, scopus - elsevier, dime...2016-03-02t18:37:44.989z2021-03-08t03:42:22.593z88001930syekhnurjati.ac.id[gmail.com][microsoft.com, twitter.com, academia.edu, aca...1.067.04.07.02.01.0
0000-0002-3920-7389111а.гусевsurname, name gusev alexander leonidovichdate...[alexander l. gusev , alexander leonidovich gu...NaN[technologies of isotope separation, 3d - prin...[[researcherid, f-8048-2014], [scopus author i...[[chemical technology and cryogenic-vacuum tec...[[general director, scientific technical centr...472[publons, datacite, scopus - elsevier, a.l. gu...2014-05-14t00:01:28.030z2021-01-16t13:44:14.134z3700210NaNNaN[youtube.com, isjaee.com, researchgate.net, re...NaN111.02.016.02.07.0
0000-0003-4948-9268111gustavoduperrégustavo norberto duperré graduated in arts and...[gustavo norberto duperré, duperré, g. n., gus...gustavo.duperre@usal.edu.ar[computer science, medieval and modern history...[[scopus author id, 57195936346], [researcheri...[[programme in history, history of art and ter...[[titular professor, dirección general de cult...41[gustavo duperré, scopus - elsevier, publons, ...2020-02-22t15:49:52.386z2021-03-12t15:13:44.065z1300340usal.edu.arNaN[icomos.ro, unirioja.es, unirioja.es, unc.edu....NaN61.02.011.06.05.0
0000-0003-2183-8112111pelayo munhozoleapós-doutorado em gestão ambiental pela univers...[ munhoz, pelayo olea, olea, pelayo, olea, p...NaN[inovação, empreendedorismo, sustentabilidade][[scopus author id, 55175503300], [researcheri...[[, postdoctoral in environmental sustainabili...[[professor, universidade federal do rio grand...1108[the lens, pelayo munhoz olea, dimensions, bas...2013-02-04t17:25:34.723z2021-03-10t14:05:17.770z797015820NaNNaN[cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c...NaN61.02.03.07.09.0
0000-0002-6938-9638111adolfocatral sanabriamy education is in computer science, mathemati...NaNNaNNaN[[loop profile, 747193]][[education, capacitación para la enseñanza en...NaN2023[base - bielefeld academic search engine, data...2019-05-07t19:27:02.210z2020-12-10t23:39:15.236z202200160NaNNaN[researchgate.net, youtube.com, linkedin.com, ...NaN152.01.0NaN6.0NaN
0000-0002-9025-8632111buycannabisdispensarywe procure and deliver premium cannabis strain...[we procure and deliver premium cannabis strai...NaN[cannabis community, cannabis culture, marijua...NaNNaNNaN10[goowonderland dispensary]2020-12-09t21:19:46.004z2020-12-10t01:17:28.772z00000NaNNaN[goowonderland.com, goowonderland.com, goowond...NaN81.0NaN7.0NaNNaN
0000-0002-9965-2425111jaroslawspychalajaroslaw spychala has received a doctoral degr...[jaroslaw jozef spychala]NaN[photochemistry, organic chemistry, biochemist...[[scopus author id, 7006745874]][[department of chemistry, postdoctoral associ...[[assistant professor, adam mickiewicz univers...29[scopus - elsevier]2014-09-18t12:34:14.242z2020-02-11t14:31:25.544z1500290NaNNaN[biowebspin.com, biowebspin.com, google.com, l...NaN73.01.04.04.02.0
0000-0002-4062-3603111juan de diosbeltrán mancillajuan de dios beltrán mancilla (*) filósofo aut...[juan de dios beltrán mancilla, filósofo autod...NaN[filosofia medicina arquitectura economía dere...NaN[[, diplomado en practicas directivas para or...[[inspector general jornada vespertina // de 2...11[juan de dios beltr´´án mancilla]2020-04-19t21:06:33.495z2021-02-10t20:13:07.698z00070NaNNaN[yumpu.com, ijopm.org, google.com, blogspot.co...NaN69.0NaN1.08.06.0
\n", + "
" + ], + "text/plain": [ + " orcid verified_email verified_primary_email \\\n", + "0000-0002-5710-4041 1 1 1 \n", + "0000-0002-1929-6054 1 1 1 \n", + "0000-0003-2407-3557 1 1 1 \n", + "0000-0002-3997-5070 1 1 1 \n", + "0000-0003-2450-090X 1 1 1 \n", + "0000-0003-2593-7134 1 1 1 \n", + "0000-0002-3920-7389 1 1 1 \n", + "0000-0003-4948-9268 1 1 1 \n", + "0000-0003-2183-8112 1 1 1 \n", + "0000-0002-6938-9638 1 1 1 \n", + "0000-0002-9025-8632 1 1 1 \n", + "0000-0002-9965-2425 1 1 1 \n", + "0000-0002-4062-3603 1 1 1 \n", + "\n", + " given_names family_name \\\n", + "0000-0002-5710-4041 ryszard romaniuk \n", + "0000-0002-1929-6054 franklin américo canaza choque \n", + "0000-0003-2407-3557 abdul aziz \n", + "0000-0002-3997-5070 dr. parameshachari b d \n", + "0000-0003-2450-090X eduard babulak \n", + "0000-0003-2593-7134 aan jaelani \n", + "0000-0002-3920-7389 а. гусев \n", + "0000-0003-4948-9268 gustavo duperré \n", + "0000-0003-2183-8112 pelayo munhoz olea \n", + "0000-0002-6938-9638 adolfo catral sanabria \n", + "0000-0002-9025-8632 buycannabis dispensary \n", + "0000-0002-9965-2425 jaroslaw spychala \n", + "0000-0002-4062-3603 juan de dios beltrán mancilla \n", + "\n", + " biography \\\n", + "0000-0002-5710-4041 professor of electronics and communications en... \n", + "0000-0002-1929-6054 docente-investigador social. maestrando en der... \n", + "0000-0003-2407-3557 abdul aziz was born on may 25, 1973, in brebes... \n", + "0000-0002-3997-5070 dr. parameshachari b dacm distinguished speake... \n", + "0000-0003-2450-090X professor eduard babulak is accomplished inter... \n", + "0000-0003-2593-7134 all my papers can be downloaded from portal:re... \n", + "0000-0002-3920-7389 surname, name gusev alexander leonidovichdate... \n", + "0000-0003-4948-9268 gustavo norberto duperré graduated in arts and... \n", + "0000-0003-2183-8112 pós-doutorado em gestão ambiental pela univers... \n", + "0000-0002-6938-9638 my education is in computer science, mathemati... \n", + "0000-0002-9025-8632 we procure and deliver premium cannabis strain... \n", + "0000-0002-9965-2425 jaroslaw spychala has received a doctoral degr... \n", + "0000-0002-4062-3603 juan de dios beltrán mancilla (*) filósofo aut... \n", + "\n", + " other_names \\\n", + "0000-0002-5710-4041 [r.romaniuk, r.s.romaniuk, ryszard romaniuk, r... \n", + "0000-0002-1929-6054 [franklin américo canaza-choque , franklin a. ... \n", + "0000-0003-2407-3557 [abdul aziz, aziz, abdul, aziz, a., aziz, abd,... \n", + "0000-0002-3997-5070 [dr. parameshachari b d] \n", + "0000-0003-2450-090X [professor eduard babulak] \n", + "0000-0003-2593-7134 [jaelani, a., jaelani, aan] \n", + "0000-0002-3920-7389 [alexander l. gusev , alexander leonidovich gu... \n", + "0000-0003-4948-9268 [gustavo norberto duperré, duperré, g. n., gus... \n", + "0000-0003-2183-8112 [ munhoz, pelayo olea, olea, pelayo, olea, p... \n", + "0000-0002-6938-9638 NaN \n", + "0000-0002-9025-8632 [we procure and deliver premium cannabis strai... \n", + "0000-0002-9965-2425 [jaroslaw jozef spychala] \n", + "0000-0002-4062-3603 [juan de dios beltrán mancilla, filósofo autod... \n", + "\n", + " primary_email \\\n", + "0000-0002-5710-4041 rrom@ise.pw.edu.pl \n", + "0000-0002-1929-6054 leo_123fa@hotmail.com \n", + "0000-0003-2407-3557 NaN \n", + "0000-0002-3997-5070 NaN \n", + "0000-0003-2450-090X NaN \n", + "0000-0003-2593-7134 aan_jaelani@syekhnurjati.ac.id \n", + "0000-0002-3920-7389 NaN \n", + "0000-0003-4948-9268 gustavo.duperre@usal.edu.ar \n", + "0000-0003-2183-8112 NaN \n", + "0000-0002-6938-9638 NaN \n", + "0000-0002-9025-8632 NaN \n", + "0000-0002-9965-2425 NaN \n", + "0000-0002-4062-3603 NaN \n", + "\n", + " keywords \\\n", + "0000-0002-5710-4041 [measurement systems, telecommunications, rese... \n", + "0000-0002-1929-6054 [justicia global; democracia; derechos humanos... \n", + "0000-0003-2407-3557 [etika bisnis islam, ekonomi islam, ilmu ekono... \n", + "0000-0002-3997-5070 [honorary secretary| iete mysuru centre, profe... \n", + "0000-0003-2450-090X [internet of things, computer networking, inte... \n", + "0000-0003-2593-7134 [islamic economics, history of islamic economi... \n", + "0000-0002-3920-7389 [technologies of isotope separation, 3d - prin... \n", + "0000-0003-4948-9268 [computer science, medieval and modern history... \n", + "0000-0003-2183-8112 [inovação, empreendedorismo, sustentabilidade] \n", + "0000-0002-6938-9638 NaN \n", + "0000-0002-9025-8632 [cannabis community, cannabis culture, marijua... \n", + "0000-0002-9965-2425 [photochemistry, organic chemistry, biochemist... \n", + "0000-0002-4062-3603 [filosofia medicina arquitectura economía dere... \n", + "\n", + " external_ids \\\n", + "0000-0002-5710-4041 [[isni, 0000000071432485], [researcherid, b-91... \n", + "0000-0002-1929-6054 [[researcherid, p-8613-2018], [loop profile, 8... \n", + "0000-0003-2407-3557 NaN \n", + "0000-0002-3997-5070 [[researcherid, f-7045-2018], [scopus author i... \n", + "0000-0003-2450-090X [[scopus author id, 6506867432], [researcherid... \n", + "0000-0003-2593-7134 [[scopus author id, 57195963463], [loop profil... \n", + "0000-0002-3920-7389 [[researcherid, f-8048-2014], [scopus author i... \n", + "0000-0003-4948-9268 [[scopus author id, 57195936346], [researcheri... \n", + "0000-0003-2183-8112 [[scopus author id, 55175503300], [researcheri... \n", + "0000-0002-6938-9638 [[loop profile, 747193]] \n", + "0000-0002-9025-8632 NaN \n", + "0000-0002-9965-2425 [[scopus author id, 7006745874]] \n", + "0000-0002-4062-3603 NaN \n", + "\n", + " education \\\n", + "0000-0002-5710-4041 [[faculty of electronics and information techn... \n", + "0000-0002-1929-6054 [[facultad de ciencias de la educación , maest... \n", + "0000-0003-2407-3557 [[ilmu ekonomi, dr, universitas borobudur, jak... \n", + "0000-0002-3997-5070 [[electronics and communication engineering, p... \n", + "0000-0003-2450-090X [[information technology, doctor habilitated (... \n", + "0000-0003-2593-7134 [[post graduate, s3/dr, universitas islam nege... \n", + "0000-0002-3920-7389 [[chemical technology and cryogenic-vacuum tec... \n", + "0000-0003-4948-9268 [[programme in history, history of art and ter... \n", + "0000-0003-2183-8112 [[, postdoctoral in environmental sustainabili... \n", + "0000-0002-6938-9638 [[education, capacitación para la enseñanza en... \n", + "0000-0002-9025-8632 NaN \n", + "0000-0002-9965-2425 [[department of chemistry, postdoctoral associ... \n", + "0000-0002-4062-3603 [[, diplomado en practicas directivas para or... \n", + "\n", + " employment \\\n", + "0000-0002-5710-4041 [[professor, institute director, politechnika ... \n", + "0000-0002-1929-6054 [[investigador social, universidad católica de... \n", + "0000-0003-2407-3557 [[assisten professor/dr, institut agama islam ... \n", + "0000-0002-3997-5070 [[acm distinguished speaker (volunteer), assoc... \n", + "0000-0003-2450-090X [[consultant, horizon 2020 framework programme... \n", + "0000-0003-2593-7134 [[dr, institut agama islam negeri syekh nurjat... \n", + "0000-0002-3920-7389 [[general director, scientific technical centr... \n", + "0000-0003-4948-9268 [[titular professor, dirección general de cult... \n", + "0000-0003-2183-8112 [[professor, universidade federal do rio grand... \n", + "0000-0002-6938-9638 NaN \n", + "0000-0002-9025-8632 NaN \n", + "0000-0002-9965-2425 [[assistant professor, adam mickiewicz univers... \n", + "0000-0002-4062-3603 [[inspector general jornada vespertina // de 2... \n", + "\n", + " n_works \\\n", + "0000-0002-5710-4041 5008 \n", + "0000-0002-1929-6054 38 \n", + "0000-0003-2407-3557 72 \n", + "0000-0002-3997-5070 93 \n", + "0000-0003-2450-090X 274 \n", + "0000-0003-2593-7134 79 \n", + "0000-0002-3920-7389 472 \n", + "0000-0003-4948-9268 41 \n", + "0000-0003-2183-8112 1108 \n", + "0000-0002-6938-9638 2023 \n", + "0000-0002-9025-8632 10 \n", + "0000-0002-9965-2425 29 \n", + "0000-0002-4062-3603 11 \n", + "\n", + " works_source \\\n", + "0000-0002-5710-4041 [inspire-hep, researcherid, isni2orcid search ... \n", + "0000-0002-1929-6054 [researcherid, base - bielefeld academic searc... \n", + "0000-0003-2407-3557 [base - bielefeld academic search engine, abdu... \n", + "0000-0002-3997-5070 [publons, multidisciplinary digital publishing... \n", + "0000-0003-2450-090X [the lens, base - bielefeld academic search en... \n", + "0000-0003-2593-7134 [publons, aan jaelani, scopus - elsevier, dime... \n", + "0000-0002-3920-7389 [publons, datacite, scopus - elsevier, a.l. gu... \n", + "0000-0003-4948-9268 [gustavo duperré, scopus - elsevier, publons, ... \n", + "0000-0003-2183-8112 [the lens, pelayo munhoz olea, dimensions, bas... \n", + "0000-0002-6938-9638 [base - bielefeld academic search engine, data... \n", + "0000-0002-9025-8632 [goowonderland dispensary] \n", + "0000-0002-9965-2425 [scopus - elsevier] \n", + "0000-0002-4062-3603 [juan de dios beltr´´án mancilla] \n", + "\n", + " activation_date last_update_date \\\n", + "0000-0002-5710-4041 2013-01-20t12:09:21.600z 2021-03-11t20:57:13.284z \n", + "0000-0002-1929-6054 2017-09-15t19:45:43.483z 2021-03-14t20:20:21.282z \n", + "0000-0003-2407-3557 2016-09-12t04:41:24.842z 2021-01-26t11:58:33.039z \n", + "0000-0002-3997-5070 2016-08-24t11:00:30.403z 2021-03-14t07:11:09.817z \n", + "0000-0003-2450-090X 2013-04-03t08:02:30.013z 2021-02-28t10:07:13.231z \n", + "0000-0003-2593-7134 2016-03-02t18:37:44.989z 2021-03-08t03:42:22.593z \n", + "0000-0002-3920-7389 2014-05-14t00:01:28.030z 2021-01-16t13:44:14.134z \n", + "0000-0003-4948-9268 2020-02-22t15:49:52.386z 2021-03-12t15:13:44.065z \n", + "0000-0003-2183-8112 2013-02-04t17:25:34.723z 2021-03-10t14:05:17.770z \n", + "0000-0002-6938-9638 2019-05-07t19:27:02.210z 2020-12-10t23:39:15.236z \n", + "0000-0002-9025-8632 2020-12-09t21:19:46.004z 2020-12-10t01:17:28.772z \n", + "0000-0002-9965-2425 2014-09-18t12:34:14.242z 2020-02-11t14:31:25.544z \n", + "0000-0002-4062-3603 2020-04-19t21:06:33.495z 2021-02-10t20:13:07.698z \n", + "\n", + " n_doi n_arxiv n_pmc n_other_pids label \\\n", + "0000-0002-5710-4041 1221 25 0 1742 0 \n", + "0000-0002-1929-6054 29 0 0 33 0 \n", + "0000-0003-2407-3557 19 0 0 77 0 \n", + "0000-0002-3997-5070 47 0 0 48 0 \n", + "0000-0003-2450-090X 199 0 1 174 0 \n", + "0000-0003-2593-7134 88 0 0 193 0 \n", + "0000-0002-3920-7389 37 0 0 21 0 \n", + "0000-0003-4948-9268 13 0 0 34 0 \n", + "0000-0003-2183-8112 797 0 1 582 0 \n", + "0000-0002-6938-9638 2022 0 0 16 0 \n", + "0000-0002-9025-8632 0 0 0 0 0 \n", + "0000-0002-9965-2425 15 0 0 29 0 \n", + "0000-0002-4062-3603 0 0 0 7 0 \n", + "\n", + " primary_email_domain \\\n", + "0000-0002-5710-4041 ise.pw.edu.pl \n", + "0000-0002-1929-6054 hotmail.com \n", + "0000-0003-2407-3557 NaN \n", + "0000-0002-3997-5070 NaN \n", + "0000-0003-2450-090X NaN \n", + "0000-0003-2593-7134 syekhnurjati.ac.id \n", + "0000-0002-3920-7389 NaN \n", + "0000-0003-4948-9268 usal.edu.ar \n", + "0000-0003-2183-8112 NaN \n", + "0000-0002-6938-9638 NaN \n", + "0000-0002-9025-8632 NaN \n", + "0000-0002-9965-2425 NaN \n", + "0000-0002-4062-3603 NaN \n", + "\n", + " other_email_domains \\\n", + "0000-0002-5710-4041 [ise.pw.edu.pl, elka.pw.edu.pl, cern.ch] \n", + "0000-0002-1929-6054 [gmail.com, gmail.com, hotmail.com, baldwin.ed... \n", + "0000-0003-2407-3557 NaN \n", + "0000-0002-3997-5070 NaN \n", + "0000-0003-2450-090X NaN \n", + "0000-0003-2593-7134 [gmail.com] \n", + "0000-0002-3920-7389 NaN \n", + "0000-0003-4948-9268 NaN \n", + "0000-0003-2183-8112 NaN \n", + "0000-0002-6938-9638 NaN \n", + "0000-0002-9025-8632 NaN \n", + "0000-0002-9965-2425 NaN \n", + "0000-0002-4062-3603 NaN \n", + "\n", + " url_domains \\\n", + "0000-0002-5710-4041 [google.pl, publons.com, scopus.com, mendeley.... \n", + "0000-0002-1929-6054 [concytec.gob.pe, redalyc.org, redalyc.org, un... \n", + "0000-0003-2407-3557 [google.com, syekhnurjati.ac.id, orcid.org, bl... \n", + "0000-0002-3997-5070 [geethashishu.in, geethashishu.in, acm.org, go... \n", + "0000-0003-2450-090X [worldassessmentcouncil.org, spseke.sk, bcs.or... \n", + "0000-0003-2593-7134 [microsoft.com, twitter.com, academia.edu, aca... \n", + "0000-0002-3920-7389 [youtube.com, isjaee.com, researchgate.net, re... \n", + "0000-0003-4948-9268 [icomos.ro, unirioja.es, unirioja.es, unc.edu.... \n", + "0000-0003-2183-8112 [cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c... \n", + "0000-0002-6938-9638 [researchgate.net, youtube.com, linkedin.com, ... \n", + "0000-0002-9025-8632 [goowonderland.com, goowonderland.com, goowond... \n", + "0000-0002-9965-2425 [biowebspin.com, biowebspin.com, google.com, l... \n", + "0000-0002-4062-3603 [yumpu.com, ijopm.org, google.com, blogspot.co... \n", + "\n", + " n_emails n_urls n_ids n_keywords n_education \\\n", + "0000-0002-5710-4041 3.0 114.0 3.0 5.0 1.0 \n", + "0000-0002-1929-6054 5.0 61.0 4.0 2.0 1.0 \n", + "0000-0003-2407-3557 NaN 59.0 NaN 4.0 3.0 \n", + "0000-0002-3997-5070 NaN 71.0 3.0 6.0 5.0 \n", + "0000-0003-2450-090X NaN 114.0 5.0 8.0 6.0 \n", + "0000-0003-2593-7134 1.0 67.0 4.0 7.0 2.0 \n", + "0000-0002-3920-7389 NaN 111.0 2.0 16.0 2.0 \n", + "0000-0003-4948-9268 NaN 61.0 2.0 11.0 6.0 \n", + "0000-0003-2183-8112 NaN 61.0 2.0 3.0 7.0 \n", + "0000-0002-6938-9638 NaN 152.0 1.0 NaN 6.0 \n", + "0000-0002-9025-8632 NaN 81.0 NaN 7.0 NaN \n", + "0000-0002-9965-2425 NaN 73.0 1.0 4.0 4.0 \n", + "0000-0002-4062-3603 NaN 69.0 NaN 1.0 8.0 \n", + "\n", + " n_employment \n", + "0000-0002-5710-4041 1.0 \n", + "0000-0002-1929-6054 1.0 \n", + "0000-0003-2407-3557 1.0 \n", + "0000-0002-3997-5070 10.0 \n", + "0000-0003-2450-090X 22.0 \n", + "0000-0003-2593-7134 1.0 \n", + "0000-0002-3920-7389 7.0 \n", + "0000-0003-4948-9268 5.0 \n", + "0000-0003-2183-8112 9.0 \n", + "0000-0002-6938-9638 NaN \n", + "0000-0002-9025-8632 NaN \n", + "0000-0002-9965-2425 2.0 \n", + "0000-0002-4062-3603 6.0 " + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[(df['url_domains'].str.len() > 50) & (df['n_works'] > 0)]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesprimary_emailkeywordsexternal_idseducationemploymentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsurl_domainsn_emailsn_urlsn_idsn_keywordsn_educationn_employment
0000-0002-3505-2797111nurulmalahayatigoogle scholarNaNNaNNaN[[researcherid, q-3861-2017]][[civil and transportation engineering , maste...[[senior lecturer, universitas syiah kuala, ba...6[nurul malahayati]2017-10-01t00:46:31.324z2019-08-19t15:52:47.253z30030NaNNaN[google.com, ristekdikti.go.id, unsyiah.ac.id,...NaN16.01.0NaN2.01.0
0000-0003-3670-9620111carlosbarreraim individual inventor, and this is my work; s...[retrodynamic, novelinflow]NaN[gearturbine, mechanical, power, innovation, t...[[loop profile, 394457]]NaNNaN1[carlos barrera]2016-08-29t20:32:10.362z2021-02-09t04:56:35.554z00000NaNNaN[blogspot.mx, behance.net, authorstream.com, d...NaN24.01.08.0NaNNaN
0000-0002-5441-0465111nuriahernández-leónNaN[nuria h. león, nuria hernández león, hernánde...NaN[marketing, research, human resources, busines...NaN[[, course: social skills, university of salam...[[merchandise reception and expedition trainer...11[nuria hernández-león]2015-11-28t07:18:58.442z2021-03-05t16:37:47.403z10040NaNNaN[feriaempresamujer.com, escueladenegociosydire...NaN16.0NaN7.019.016.0
0000-0002-7781-6767111mohd nazriismailborn in penang, malaysia in 1971, dr. mohd had...[ndum (national defence university of malaysia)]NaN[network communication, manet, wsn, network se...[[scopus author id, 24372977800], [researcheri...NaN[[lecturer, universiti pertahanan nasional mal...35[scopus - elsevier]2016-09-06t02:25:52.974z2020-10-20t06:55:55.051z2400350NaNNaN[google.com.my, researchgate.net, academia.edu...NaN16.02.010.0NaN4.0
0000-0001-7010-2908111clarasarmentoclara sarmento holds an aggregation in cultura...NaNNaN[ethnography, tourism and business, anglo-amer...[[ciência id, d418-d6f8-7d49]][[ao abrigo da bolsa santander ie best practic...[[presidente da comissão de acreditação do nov...275[clara sarmento]2013-12-12t00:33:58.190z2020-10-12t14:43:00.749z1700600NaNNaN[iscap.pt, google.pt, academia.edu, researchga...NaN13.01.06.08.037.0
.............................................................................................
0000-0002-9446-9496111jesúsportillo-fernándezba in philosophy, ba in humanities, ph.d. in p...[jesús portillo fernández, portillo-fernández,...NaN[absurdo, lingüística, pragmática, filosofía d...[[scopus author id, 55229372800]][[, doctor en filología, universidad de sevill...[[, grupo de investigación en lógica, lenguaje...35[jesús portillo-fernández]2015-03-08t20:37:16.590z2021-03-12t22:05:28.976z00000NaNNaN[us.es, us.es, us.es, google.es, microsoft.com...NaN12.01.05.05.01.0
0000-0003-0579-5829111ángelcarrión-tavárezNaN[ángel carrión tavárez, á carrión tavárez, ác ...NaN[editing and publishing, sociomusicology, geog...[[loop profile, 687295]][[integration and economic and territorial dev...[[director, university of puerto rico at río p...132[ángel carrión-tavárez]2017-12-30t19:25:41.566z2021-03-13t23:21:59.069z1300280NaNNaN[academia.edu, redalyc.org, directorioexit.inf...NaN11.01.06.04.03.0
0000-0001-8960-9004111susanbastaniNaN[s. bastani, سوسن باستانی]sbastani@alzahra.ac.ir[social networks, online and offline communiti...[[scopus author id, 16642098400]][[sociology, ph.d., university of toronto, tor...[[professor, alzahra university, tehran, vanak...20[scopus - elsevier]2019-07-10t06:50:46.255z2020-10-07t04:08:01.961z1900330alzahra.ac.ir[gmail.com, gmail.com][scopus.com, google.com, publons.com, zenodo.o...2.011.01.04.03.04.0
0000-0002-4379-6454111caroline wanjirukariukicaroline holds a phd in economics from curtin ...NaNNaN[applied economics, financial economics, appli...NaN[[economics, doctor of philosophy , curtin uni...[[director, educational development, strathmor...4[caroline wanjiru kariuki]2020-03-18t10:18:04.007z2021-02-11t14:40:38.515z10000NaNNaN[scopus.com, mendeley.com, publons.com, resear...NaN13.0NaN4.03.06.0
0000-0003-2311-0600111myokyaw hlaingNaN[dr myo kyaw hlaing]NaN[economic geology]NaNNaN[[lecturer, union of myanmar ministry of educa...2[myo kyaw hlaing]2018-12-26t12:51:57.801z2021-01-26t14:36:47.421z10020NaNNaN[facebook.com, linkedin.com, instagram.com, re...NaN12.0NaN1.0NaN2.0
\n", + "

141 rows × 30 columns

\n", + "
" + ], + "text/plain": [ + " orcid verified_email verified_primary_email \\\n", + "0000-0002-3505-2797 1 1 1 \n", + "0000-0003-3670-9620 1 1 1 \n", + "0000-0002-5441-0465 1 1 1 \n", + "0000-0002-7781-6767 1 1 1 \n", + "0000-0001-7010-2908 1 1 1 \n", + "... ... ... ... \n", + "0000-0002-9446-9496 1 1 1 \n", + "0000-0003-0579-5829 1 1 1 \n", + "0000-0001-8960-9004 1 1 1 \n", + "0000-0002-4379-6454 1 1 1 \n", + "0000-0003-2311-0600 1 1 1 \n", + "\n", + " given_names family_name \\\n", + "0000-0002-3505-2797 nurul malahayati \n", + "0000-0003-3670-9620 carlos barrera \n", + "0000-0002-5441-0465 nuria hernández-león \n", + "0000-0002-7781-6767 mohd nazri ismail \n", + "0000-0001-7010-2908 clara sarmento \n", + "... ... ... \n", + "0000-0002-9446-9496 jesús portillo-fernández \n", + "0000-0003-0579-5829 ángel carrión-tavárez \n", + "0000-0001-8960-9004 susan bastani \n", + "0000-0002-4379-6454 caroline wanjiru kariuki \n", + "0000-0003-2311-0600 myo kyaw hlaing \n", + "\n", + " biography \\\n", + "0000-0002-3505-2797 google scholar \n", + "0000-0003-3670-9620 im individual inventor, and this is my work; s... \n", + "0000-0002-5441-0465 NaN \n", + "0000-0002-7781-6767 born in penang, malaysia in 1971, dr. mohd had... \n", + "0000-0001-7010-2908 clara sarmento holds an aggregation in cultura... \n", + "... ... \n", + "0000-0002-9446-9496 ba in philosophy, ba in humanities, ph.d. in p... \n", + "0000-0003-0579-5829 NaN \n", + "0000-0001-8960-9004 NaN \n", + "0000-0002-4379-6454 caroline holds a phd in economics from curtin ... \n", + "0000-0003-2311-0600 NaN \n", + "\n", + " other_names \\\n", + "0000-0002-3505-2797 NaN \n", + "0000-0003-3670-9620 [retrodynamic, novelinflow] \n", + "0000-0002-5441-0465 [nuria h. león, nuria hernández león, hernánde... \n", + "0000-0002-7781-6767 [ndum (national defence university of malaysia)] \n", + "0000-0001-7010-2908 NaN \n", + "... ... \n", + "0000-0002-9446-9496 [jesús portillo fernández, portillo-fernández,... \n", + "0000-0003-0579-5829 [ángel carrión tavárez, á carrión tavárez, ác ... \n", + "0000-0001-8960-9004 [s. bastani, سوسن باستانی] \n", + "0000-0002-4379-6454 NaN \n", + "0000-0003-2311-0600 [dr myo kyaw hlaing] \n", + "\n", + " primary_email \\\n", + "0000-0002-3505-2797 NaN \n", + "0000-0003-3670-9620 NaN \n", + "0000-0002-5441-0465 NaN \n", + "0000-0002-7781-6767 NaN \n", + "0000-0001-7010-2908 NaN \n", + "... ... \n", + "0000-0002-9446-9496 NaN \n", + "0000-0003-0579-5829 NaN \n", + "0000-0001-8960-9004 sbastani@alzahra.ac.ir \n", + "0000-0002-4379-6454 NaN \n", + "0000-0003-2311-0600 NaN \n", + "\n", + " keywords \\\n", + "0000-0002-3505-2797 NaN \n", + "0000-0003-3670-9620 [gearturbine, mechanical, power, innovation, t... \n", + "0000-0002-5441-0465 [marketing, research, human resources, busines... \n", + "0000-0002-7781-6767 [network communication, manet, wsn, network se... \n", + "0000-0001-7010-2908 [ethnography, tourism and business, anglo-amer... \n", + "... ... \n", + "0000-0002-9446-9496 [absurdo, lingüística, pragmática, filosofía d... \n", + "0000-0003-0579-5829 [editing and publishing, sociomusicology, geog... \n", + "0000-0001-8960-9004 [social networks, online and offline communiti... \n", + "0000-0002-4379-6454 [applied economics, financial economics, appli... \n", + "0000-0003-2311-0600 [economic geology] \n", + "\n", + " external_ids \\\n", + "0000-0002-3505-2797 [[researcherid, q-3861-2017]] \n", + "0000-0003-3670-9620 [[loop profile, 394457]] \n", + "0000-0002-5441-0465 NaN \n", + "0000-0002-7781-6767 [[scopus author id, 24372977800], [researcheri... \n", + "0000-0001-7010-2908 [[ciência id, d418-d6f8-7d49]] \n", + "... ... \n", + "0000-0002-9446-9496 [[scopus author id, 55229372800]] \n", + "0000-0003-0579-5829 [[loop profile, 687295]] \n", + "0000-0001-8960-9004 [[scopus author id, 16642098400]] \n", + "0000-0002-4379-6454 NaN \n", + "0000-0003-2311-0600 NaN \n", + "\n", + " education \\\n", + "0000-0002-3505-2797 [[civil and transportation engineering , maste... \n", + "0000-0003-3670-9620 NaN \n", + "0000-0002-5441-0465 [[, course: social skills, university of salam... \n", + "0000-0002-7781-6767 NaN \n", + "0000-0001-7010-2908 [[ao abrigo da bolsa santander ie best practic... \n", + "... ... \n", + "0000-0002-9446-9496 [[, doctor en filología, universidad de sevill... \n", + "0000-0003-0579-5829 [[integration and economic and territorial dev... \n", + "0000-0001-8960-9004 [[sociology, ph.d., university of toronto, tor... \n", + "0000-0002-4379-6454 [[economics, doctor of philosophy , curtin uni... \n", + "0000-0003-2311-0600 NaN \n", + "\n", + " employment \\\n", + "0000-0002-3505-2797 [[senior lecturer, universitas syiah kuala, ba... \n", + "0000-0003-3670-9620 NaN \n", + "0000-0002-5441-0465 [[merchandise reception and expedition trainer... \n", + "0000-0002-7781-6767 [[lecturer, universiti pertahanan nasional mal... \n", + "0000-0001-7010-2908 [[presidente da comissão de acreditação do nov... \n", + "... ... \n", + "0000-0002-9446-9496 [[, grupo de investigación en lógica, lenguaje... \n", + "0000-0003-0579-5829 [[director, university of puerto rico at río p... \n", + "0000-0001-8960-9004 [[professor, alzahra university, tehran, vanak... \n", + "0000-0002-4379-6454 [[director, educational development, strathmor... \n", + "0000-0003-2311-0600 [[lecturer, union of myanmar ministry of educa... \n", + "\n", + " n_works works_source \\\n", + "0000-0002-3505-2797 6 [nurul malahayati] \n", + "0000-0003-3670-9620 1 [carlos barrera] \n", + "0000-0002-5441-0465 11 [nuria hernández-león] \n", + "0000-0002-7781-6767 35 [scopus - elsevier] \n", + "0000-0001-7010-2908 275 [clara sarmento] \n", + "... ... ... \n", + "0000-0002-9446-9496 35 [jesús portillo-fernández] \n", + "0000-0003-0579-5829 132 [ángel carrión-tavárez] \n", + "0000-0001-8960-9004 20 [scopus - elsevier] \n", + "0000-0002-4379-6454 4 [caroline wanjiru kariuki] \n", + "0000-0003-2311-0600 2 [myo kyaw hlaing] \n", + "\n", + " activation_date last_update_date \\\n", + "0000-0002-3505-2797 2017-10-01t00:46:31.324z 2019-08-19t15:52:47.253z \n", + "0000-0003-3670-9620 2016-08-29t20:32:10.362z 2021-02-09t04:56:35.554z \n", + "0000-0002-5441-0465 2015-11-28t07:18:58.442z 2021-03-05t16:37:47.403z \n", + "0000-0002-7781-6767 2016-09-06t02:25:52.974z 2020-10-20t06:55:55.051z \n", + "0000-0001-7010-2908 2013-12-12t00:33:58.190z 2020-10-12t14:43:00.749z \n", + "... ... ... \n", + "0000-0002-9446-9496 2015-03-08t20:37:16.590z 2021-03-12t22:05:28.976z \n", + "0000-0003-0579-5829 2017-12-30t19:25:41.566z 2021-03-13t23:21:59.069z \n", + "0000-0001-8960-9004 2019-07-10t06:50:46.255z 2020-10-07t04:08:01.961z \n", + "0000-0002-4379-6454 2020-03-18t10:18:04.007z 2021-02-11t14:40:38.515z \n", + "0000-0003-2311-0600 2018-12-26t12:51:57.801z 2021-01-26t14:36:47.421z \n", + "\n", + " n_doi n_arxiv n_pmc n_other_pids label \\\n", + "0000-0002-3505-2797 3 0 0 3 0 \n", + "0000-0003-3670-9620 0 0 0 0 0 \n", + "0000-0002-5441-0465 1 0 0 4 0 \n", + "0000-0002-7781-6767 24 0 0 35 0 \n", + "0000-0001-7010-2908 17 0 0 60 0 \n", + "... ... ... ... ... ... \n", + "0000-0002-9446-9496 0 0 0 0 0 \n", + "0000-0003-0579-5829 13 0 0 28 0 \n", + "0000-0001-8960-9004 19 0 0 33 0 \n", + "0000-0002-4379-6454 1 0 0 0 0 \n", + "0000-0003-2311-0600 1 0 0 2 0 \n", + "\n", + " primary_email_domain other_email_domains \\\n", + "0000-0002-3505-2797 NaN NaN \n", + "0000-0003-3670-9620 NaN NaN \n", + "0000-0002-5441-0465 NaN NaN \n", + "0000-0002-7781-6767 NaN NaN \n", + "0000-0001-7010-2908 NaN NaN \n", + "... ... ... \n", + "0000-0002-9446-9496 NaN NaN \n", + "0000-0003-0579-5829 NaN NaN \n", + "0000-0001-8960-9004 alzahra.ac.ir [gmail.com, gmail.com] \n", + "0000-0002-4379-6454 NaN NaN \n", + "0000-0003-2311-0600 NaN NaN \n", + "\n", + " url_domains \\\n", + "0000-0002-3505-2797 [google.com, ristekdikti.go.id, unsyiah.ac.id,... \n", + "0000-0003-3670-9620 [blogspot.mx, behance.net, authorstream.com, d... \n", + "0000-0002-5441-0465 [feriaempresamujer.com, escueladenegociosydire... \n", + "0000-0002-7781-6767 [google.com.my, researchgate.net, academia.edu... \n", + "0000-0001-7010-2908 [iscap.pt, google.pt, academia.edu, researchga... \n", + "... ... \n", + "0000-0002-9446-9496 [us.es, us.es, us.es, google.es, microsoft.com... \n", + "0000-0003-0579-5829 [academia.edu, redalyc.org, directorioexit.inf... \n", + "0000-0001-8960-9004 [scopus.com, google.com, publons.com, zenodo.o... \n", + "0000-0002-4379-6454 [scopus.com, mendeley.com, publons.com, resear... \n", + "0000-0003-2311-0600 [facebook.com, linkedin.com, instagram.com, re... \n", + "\n", + " n_emails n_urls n_ids n_keywords n_education \\\n", + "0000-0002-3505-2797 NaN 16.0 1.0 NaN 2.0 \n", + "0000-0003-3670-9620 NaN 24.0 1.0 8.0 NaN \n", + "0000-0002-5441-0465 NaN 16.0 NaN 7.0 19.0 \n", + "0000-0002-7781-6767 NaN 16.0 2.0 10.0 NaN \n", + "0000-0001-7010-2908 NaN 13.0 1.0 6.0 8.0 \n", + "... ... ... ... ... ... \n", + "0000-0002-9446-9496 NaN 12.0 1.0 5.0 5.0 \n", + "0000-0003-0579-5829 NaN 11.0 1.0 6.0 4.0 \n", + "0000-0001-8960-9004 2.0 11.0 1.0 4.0 3.0 \n", + "0000-0002-4379-6454 NaN 13.0 NaN 4.0 3.0 \n", + "0000-0003-2311-0600 NaN 12.0 NaN 1.0 NaN \n", + "\n", + " n_employment \n", + "0000-0002-3505-2797 1.0 \n", + "0000-0003-3670-9620 NaN \n", + "0000-0002-5441-0465 16.0 \n", + "0000-0002-7781-6767 4.0 \n", + "0000-0001-7010-2908 37.0 \n", + "... ... \n", + "0000-0002-9446-9496 1.0 \n", + "0000-0003-0579-5829 3.0 \n", + "0000-0001-8960-9004 4.0 \n", + "0000-0002-4379-6454 6.0 \n", + "0000-0003-2311-0600 2.0 \n", + "\n", + "[141 rows x 30 columns]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesprimary_emailkeywordsexternal_idseducationemploymentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsurl_domainsn_emailsn_urlsn_idsn_keywordsn_educationn_employment
0111nurulmalahayatigoogle scholarNaNNaNNaN[[researcherid, q-3861-2017]][[civil and transportation engineering , maste...[[senior lecturer, universitas syiah kuala, ba...6nurul malahayati2017-10-01t00:46:31.324z2019-08-19t15:52:47.253z30030NaNNaN[google.com, ristekdikti.go.id, unsyiah.ac.id,...NaN16.01.0NaN2.01.0
1111carlosbarreraim individual inventor, and this is my work; s...[retrodynamic, novelinflow]NaN[gearturbine, mechanical, power, innovation, t...[[loop profile, 394457]]NaNNaN1carlos barrera2016-08-29t20:32:10.362z2021-02-09t04:56:35.554z00000NaNNaN[blogspot.mx, behance.net, authorstream.com, d...NaN24.01.08.0NaNNaN
2111nuriahernández-leónNaN[nuria h. león, nuria hernández león, hernánde...NaN[marketing, research, human resources, busines...NaN[[, course: social skills, university of salam...[[merchandise reception and expedition trainer...11nuria hernández-león2015-11-28t07:18:58.442z2021-03-05t16:37:47.403z10040NaNNaN[feriaempresamujer.com, escueladenegociosydire...NaN16.0NaN7.019.016.0
3111mohd nazriismailborn in penang, malaysia in 1971, dr. mohd had...[ndum (national defence university of malaysia)]NaN[network communication, manet, wsn, network se...[[scopus author id, 24372977800], [researcheri...NaN[[lecturer, universiti pertahanan nasional mal...35scopus - elsevier2016-09-06t02:25:52.974z2020-10-20t06:55:55.051z2400350NaNNaN[google.com.my, researchgate.net, academia.edu...NaN16.02.010.0NaN4.0
4111clarasarmentoclara sarmento holds an aggregation in cultura...NaNNaN[ethnography, tourism and business, anglo-amer...[[ciência id, d418-d6f8-7d49]][[ao abrigo da bolsa santander ie best practic...[[presidente da comissão de acreditação do nov...275clara sarmento2013-12-12t00:33:58.190z2020-10-12t14:43:00.749z1700600NaNNaN[iscap.pt, google.pt, academia.edu, researchga...NaN13.01.06.08.037.0
.............................................................................................
136111jesúsportillo-fernándezba in philosophy, ba in humanities, ph.d. in p...[jesús portillo fernández, portillo-fernández,...NaN[absurdo, lingüística, pragmática, filosofía d...[[scopus author id, 55229372800]][[, doctor en filología, universidad de sevill...[[, grupo de investigación en lógica, lenguaje...35jesús portillo-fernández2015-03-08t20:37:16.590z2021-03-12t22:05:28.976z00000NaNNaN[us.es, us.es, us.es, google.es, microsoft.com...NaN12.01.05.05.01.0
137111ángelcarrión-tavárezNaN[ángel carrión tavárez, á carrión tavárez, ác ...NaN[editing and publishing, sociomusicology, geog...[[loop profile, 687295]][[integration and economic and territorial dev...[[director, university of puerto rico at río p...132ángel carrión-tavárez2017-12-30t19:25:41.566z2021-03-13t23:21:59.069z1300280NaNNaN[academia.edu, redalyc.org, directorioexit.inf...NaN11.01.06.04.03.0
138111susanbastaniNaN[s. bastani, سوسن باستانی]sbastani@alzahra.ac.ir[social networks, online and offline communiti...[[scopus author id, 16642098400]][[sociology, ph.d., university of toronto, tor...[[professor, alzahra university, tehran, vanak...20scopus - elsevier2019-07-10t06:50:46.255z2020-10-07t04:08:01.961z1900330alzahra.ac.ir[gmail.com, gmail.com][scopus.com, google.com, publons.com, zenodo.o...2.011.01.04.03.04.0
139111caroline wanjirukariukicaroline holds a phd in economics from curtin ...NaNNaN[applied economics, financial economics, appli...NaN[[economics, doctor of philosophy , curtin uni...[[director, educational development, strathmor...4caroline wanjiru kariuki2020-03-18t10:18:04.007z2021-02-11t14:40:38.515z10000NaNNaN[scopus.com, mendeley.com, publons.com, resear...NaN13.0NaN4.03.06.0
140111myokyaw hlaingNaN[dr myo kyaw hlaing]NaN[economic geology]NaNNaN[[lecturer, union of myanmar ministry of educa...2myo kyaw hlaing2018-12-26t12:51:57.801z2021-01-26t14:36:47.421z10020NaNNaN[facebook.com, linkedin.com, instagram.com, re...NaN12.0NaN1.0NaN2.0
\n", + "

141 rows × 30 columns

\n", + "
" + ], + "text/plain": [ + " orcid verified_email verified_primary_email given_names \\\n", + "0 1 1 1 nurul \n", + "1 1 1 1 carlos \n", + "2 1 1 1 nuria \n", + "3 1 1 1 mohd nazri \n", + "4 1 1 1 clara \n", + ".. ... ... ... ... \n", + "136 1 1 1 jesús \n", + "137 1 1 1 ángel \n", + "138 1 1 1 susan \n", + "139 1 1 1 caroline wanjiru \n", + "140 1 1 1 myo \n", + "\n", + " family_name biography \\\n", + "0 malahayati google scholar \n", + "1 barrera im individual inventor, and this is my work; s... \n", + "2 hernández-león NaN \n", + "3 ismail born in penang, malaysia in 1971, dr. mohd had... \n", + "4 sarmento clara sarmento holds an aggregation in cultura... \n", + ".. ... ... \n", + "136 portillo-fernández ba in philosophy, ba in humanities, ph.d. in p... \n", + "137 carrión-tavárez NaN \n", + "138 bastani NaN \n", + "139 kariuki caroline holds a phd in economics from curtin ... \n", + "140 kyaw hlaing NaN \n", + "\n", + " other_names \\\n", + "0 NaN \n", + "1 [retrodynamic, novelinflow] \n", + "2 [nuria h. león, nuria hernández león, hernánde... \n", + "3 [ndum (national defence university of malaysia)] \n", + "4 NaN \n", + ".. ... \n", + "136 [jesús portillo fernández, portillo-fernández,... \n", + "137 [ángel carrión tavárez, á carrión tavárez, ác ... \n", + "138 [s. bastani, سوسن باستانی] \n", + "139 NaN \n", + "140 [dr myo kyaw hlaing] \n", + "\n", + " primary_email \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + ".. ... \n", + "136 NaN \n", + "137 NaN \n", + "138 sbastani@alzahra.ac.ir \n", + "139 NaN \n", + "140 NaN \n", + "\n", + " keywords \\\n", + "0 NaN \n", + "1 [gearturbine, mechanical, power, innovation, t... \n", + "2 [marketing, research, human resources, busines... \n", + "3 [network communication, manet, wsn, network se... \n", + "4 [ethnography, tourism and business, anglo-amer... \n", + ".. ... \n", + "136 [absurdo, lingüística, pragmática, filosofía d... \n", + "137 [editing and publishing, sociomusicology, geog... \n", + "138 [social networks, online and offline communiti... \n", + "139 [applied economics, financial economics, appli... \n", + "140 [economic geology] \n", + "\n", + " external_ids \\\n", + "0 [[researcherid, q-3861-2017]] \n", + "1 [[loop profile, 394457]] \n", + "2 NaN \n", + "3 [[scopus author id, 24372977800], [researcheri... \n", + "4 [[ciência id, d418-d6f8-7d49]] \n", + ".. ... \n", + "136 [[scopus author id, 55229372800]] \n", + "137 [[loop profile, 687295]] \n", + "138 [[scopus author id, 16642098400]] \n", + "139 NaN \n", + "140 NaN \n", + "\n", + " education \\\n", + "0 [[civil and transportation engineering , maste... \n", + "1 NaN \n", + "2 [[, course: social skills, university of salam... \n", + "3 NaN \n", + "4 [[ao abrigo da bolsa santander ie best practic... \n", + ".. ... \n", + "136 [[, doctor en filología, universidad de sevill... \n", + "137 [[integration and economic and territorial dev... \n", + "138 [[sociology, ph.d., university of toronto, tor... \n", + "139 [[economics, doctor of philosophy , curtin uni... \n", + "140 NaN \n", + "\n", + " employment n_works \\\n", + "0 [[senior lecturer, universitas syiah kuala, ba... 6 \n", + "1 NaN 1 \n", + "2 [[merchandise reception and expedition trainer... 11 \n", + "3 [[lecturer, universiti pertahanan nasional mal... 35 \n", + "4 [[presidente da comissão de acreditação do nov... 275 \n", + ".. ... ... \n", + "136 [[, grupo de investigación en lógica, lenguaje... 35 \n", + "137 [[director, university of puerto rico at río p... 132 \n", + "138 [[professor, alzahra university, tehran, vanak... 20 \n", + "139 [[director, educational development, strathmor... 4 \n", + "140 [[lecturer, union of myanmar ministry of educa... 2 \n", + "\n", + " works_source activation_date \\\n", + "0 nurul malahayati 2017-10-01t00:46:31.324z \n", + "1 carlos barrera 2016-08-29t20:32:10.362z \n", + "2 nuria hernández-león 2015-11-28t07:18:58.442z \n", + "3 scopus - elsevier 2016-09-06t02:25:52.974z \n", + "4 clara sarmento 2013-12-12t00:33:58.190z \n", + ".. ... ... \n", + "136 jesús portillo-fernández 2015-03-08t20:37:16.590z \n", + "137 ángel carrión-tavárez 2017-12-30t19:25:41.566z \n", + "138 scopus - elsevier 2019-07-10t06:50:46.255z \n", + "139 caroline wanjiru kariuki 2020-03-18t10:18:04.007z \n", + "140 myo kyaw hlaing 2018-12-26t12:51:57.801z \n", + "\n", + " last_update_date n_doi n_arxiv n_pmc n_other_pids label \\\n", + "0 2019-08-19t15:52:47.253z 3 0 0 3 0 \n", + "1 2021-02-09t04:56:35.554z 0 0 0 0 0 \n", + "2 2021-03-05t16:37:47.403z 1 0 0 4 0 \n", + "3 2020-10-20t06:55:55.051z 24 0 0 35 0 \n", + "4 2020-10-12t14:43:00.749z 17 0 0 60 0 \n", + ".. ... ... ... ... ... ... \n", + "136 2021-03-12t22:05:28.976z 0 0 0 0 0 \n", + "137 2021-03-13t23:21:59.069z 13 0 0 28 0 \n", + "138 2020-10-07t04:08:01.961z 19 0 0 33 0 \n", + "139 2021-02-11t14:40:38.515z 1 0 0 0 0 \n", + "140 2021-01-26t14:36:47.421z 1 0 0 2 0 \n", + "\n", + " primary_email_domain other_email_domains \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + ".. ... ... \n", + "136 NaN NaN \n", + "137 NaN NaN \n", + "138 alzahra.ac.ir [gmail.com, gmail.com] \n", + "139 NaN NaN \n", + "140 NaN NaN \n", + "\n", + " url_domains n_emails n_urls \\\n", + "0 [google.com, ristekdikti.go.id, unsyiah.ac.id,... NaN 16.0 \n", + "1 [blogspot.mx, behance.net, authorstream.com, d... NaN 24.0 \n", + "2 [feriaempresamujer.com, escueladenegociosydire... NaN 16.0 \n", + "3 [google.com.my, researchgate.net, academia.edu... NaN 16.0 \n", + "4 [iscap.pt, google.pt, academia.edu, researchga... NaN 13.0 \n", + ".. ... ... ... \n", + "136 [us.es, us.es, us.es, google.es, microsoft.com... NaN 12.0 \n", + "137 [academia.edu, redalyc.org, directorioexit.inf... NaN 11.0 \n", + "138 [scopus.com, google.com, publons.com, zenodo.o... 2.0 11.0 \n", + "139 [scopus.com, mendeley.com, publons.com, resear... NaN 13.0 \n", + "140 [facebook.com, linkedin.com, instagram.com, re... NaN 12.0 \n", + "\n", + " n_ids n_keywords n_education n_employment \n", + "0 1.0 NaN 2.0 1.0 \n", + "1 1.0 8.0 NaN NaN \n", + "2 NaN 7.0 19.0 16.0 \n", + "3 2.0 10.0 NaN 4.0 \n", + "4 1.0 6.0 8.0 37.0 \n", + ".. ... ... ... ... \n", + "136 1.0 5.0 5.0 1.0 \n", + "137 1.0 6.0 4.0 3.0 \n", + "138 1.0 4.0 3.0 4.0 \n", + "139 NaN 4.0 3.0 6.0 \n", + "140 NaN 1.0 NaN 2.0 \n", + "\n", + "[141 rows x 30 columns]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "exploded_sources = df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)].explode('works_source').reset_index(drop=True)\n", "exploded_sources" @@ -1046,11 +9918,598 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesprimary_emailkeywordsexternal_idseducationemploymentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsurl_domainsn_emailsn_urlsn_idsn_keywordsn_educationn_employment
0111nurulmalahayatigoogle scholarNaNNaNNaN[[researcherid, q-3861-2017]][[civil and transportation engineering , maste...[[senior lecturer, universitas syiah kuala, ba...6nurul malahayati2017-10-01t00:46:31.324z2019-08-19t15:52:47.253z30030NaNNaN[google.com, ristekdikti.go.id, unsyiah.ac.id,...NaN16.01.0NaN2.01.0
1111carlosbarreraim individual inventor, and this is my work; s...[retrodynamic, novelinflow]NaN[gearturbine, mechanical, power, innovation, t...[[loop profile, 394457]]NaNNaN1carlos barrera2016-08-29t20:32:10.362z2021-02-09t04:56:35.554z00000NaNNaN[blogspot.mx, behance.net, authorstream.com, d...NaN24.01.08.0NaNNaN
2111nuriahernández-leónNaN[nuria h. león, nuria hernández león, hernánde...NaN[marketing, research, human resources, busines...NaN[[, course: social skills, university of salam...[[merchandise reception and expedition trainer...11nuria hernández-león2015-11-28t07:18:58.442z2021-03-05t16:37:47.403z10040NaNNaN[feriaempresamujer.com, escueladenegociosydire...NaN16.0NaN7.019.016.0
4111clarasarmentoclara sarmento holds an aggregation in cultura...NaNNaN[ethnography, tourism and business, anglo-amer...[[ciência id, d418-d6f8-7d49]][[ao abrigo da bolsa santander ie best practic...[[presidente da comissão de acreditação do nov...275clara sarmento2013-12-12t00:33:58.190z2020-10-12t14:43:00.749z1700600NaNNaN[iscap.pt, google.pt, academia.edu, researchga...NaN13.01.06.08.037.0
5111micheledantinimichele dantini (ph. d) is professor of histor...NaNNaN[contemporary art, art theory, postcolonial, h...NaNNaN[[, università per stranieri di perugia, perug...6michele dantini2014-02-22t17:01:43.444z2019-11-25t20:21:04.714z00000NaNNaN[unipmn.it, huffingtonpost.it, roars.it, doppi...NaN15.0NaN6.0NaN1.0
.............................................................................................
134111robertoharasystematics, evolutionary biology, and the his...[r. o’hara, r.j. o’hara, robert o’hara, robert...NaN[evolutionary biology, new england genealogy, ...[[isni, 0000000138200102], [researcherid, b-47...[[biology, ph.d., harvard university, cambridg...NaN45robert j. o’hara2014-09-21t02:45:19.620z2020-07-09t06:51:09.228z2300720NaNNaN[rjohara.net, google.com, collegiateway.org, r...NaN12.03.05.01.0NaN
136111jesúsportillo-fernándezba in philosophy, ba in humanities, ph.d. in p...[jesús portillo fernández, portillo-fernández,...NaN[absurdo, lingüística, pragmática, filosofía d...[[scopus author id, 55229372800]][[, doctor en filología, universidad de sevill...[[, grupo de investigación en lógica, lenguaje...35jesús portillo-fernández2015-03-08t20:37:16.590z2021-03-12t22:05:28.976z00000NaNNaN[us.es, us.es, us.es, google.es, microsoft.com...NaN12.01.05.05.01.0
137111ángelcarrión-tavárezNaN[ángel carrión tavárez, á carrión tavárez, ác ...NaN[editing and publishing, sociomusicology, geog...[[loop profile, 687295]][[integration and economic and territorial dev...[[director, university of puerto rico at río p...132ángel carrión-tavárez2017-12-30t19:25:41.566z2021-03-13t23:21:59.069z1300280NaNNaN[academia.edu, redalyc.org, directorioexit.inf...NaN11.01.06.04.03.0
139111caroline wanjirukariukicaroline holds a phd in economics from curtin ...NaNNaN[applied economics, financial economics, appli...NaN[[economics, doctor of philosophy , curtin uni...[[director, educational development, strathmor...4caroline wanjiru kariuki2020-03-18t10:18:04.007z2021-02-11t14:40:38.515z10000NaNNaN[scopus.com, mendeley.com, publons.com, resear...NaN13.0NaN4.03.06.0
140111myokyaw hlaingNaN[dr myo kyaw hlaing]NaN[economic geology]NaNNaN[[lecturer, union of myanmar ministry of educa...2myo kyaw hlaing2018-12-26t12:51:57.801z2021-01-26t14:36:47.421z10020NaNNaN[facebook.com, linkedin.com, instagram.com, re...NaN12.0NaN1.0NaN2.0
\n", + "

115 rows × 30 columns

\n", + "
" + ], + "text/plain": [ + " orcid verified_email verified_primary_email given_names \\\n", + "0 1 1 1 nurul \n", + "1 1 1 1 carlos \n", + "2 1 1 1 nuria \n", + "4 1 1 1 clara \n", + "5 1 1 1 michele \n", + ".. ... ... ... ... \n", + "134 1 1 1 robert \n", + "136 1 1 1 jesús \n", + "137 1 1 1 ángel \n", + "139 1 1 1 caroline wanjiru \n", + "140 1 1 1 myo \n", + "\n", + " family_name biography \\\n", + "0 malahayati google scholar \n", + "1 barrera im individual inventor, and this is my work; s... \n", + "2 hernández-león NaN \n", + "4 sarmento clara sarmento holds an aggregation in cultura... \n", + "5 dantini michele dantini (ph. d) is professor of histor... \n", + ".. ... ... \n", + "134 ohara systematics, evolutionary biology, and the his... \n", + "136 portillo-fernández ba in philosophy, ba in humanities, ph.d. in p... \n", + "137 carrión-tavárez NaN \n", + "139 kariuki caroline holds a phd in economics from curtin ... \n", + "140 kyaw hlaing NaN \n", + "\n", + " other_names primary_email \\\n", + "0 NaN NaN \n", + "1 [retrodynamic, novelinflow] NaN \n", + "2 [nuria h. león, nuria hernández león, hernánde... NaN \n", + "4 NaN NaN \n", + "5 NaN NaN \n", + ".. ... ... \n", + "134 [r. o’hara, r.j. o’hara, robert o’hara, robert... NaN \n", + "136 [jesús portillo fernández, portillo-fernández,... NaN \n", + "137 [ángel carrión tavárez, á carrión tavárez, ác ... NaN \n", + "139 NaN NaN \n", + "140 [dr myo kyaw hlaing] NaN \n", + "\n", + " keywords \\\n", + "0 NaN \n", + "1 [gearturbine, mechanical, power, innovation, t... \n", + "2 [marketing, research, human resources, busines... \n", + "4 [ethnography, tourism and business, anglo-amer... \n", + "5 [contemporary art, art theory, postcolonial, h... \n", + ".. ... \n", + "134 [evolutionary biology, new england genealogy, ... \n", + "136 [absurdo, lingüística, pragmática, filosofía d... \n", + "137 [editing and publishing, sociomusicology, geog... \n", + "139 [applied economics, financial economics, appli... \n", + "140 [economic geology] \n", + "\n", + " external_ids \\\n", + "0 [[researcherid, q-3861-2017]] \n", + "1 [[loop profile, 394457]] \n", + "2 NaN \n", + "4 [[ciência id, d418-d6f8-7d49]] \n", + "5 NaN \n", + ".. ... \n", + "134 [[isni, 0000000138200102], [researcherid, b-47... \n", + "136 [[scopus author id, 55229372800]] \n", + "137 [[loop profile, 687295]] \n", + "139 NaN \n", + "140 NaN \n", + "\n", + " education \\\n", + "0 [[civil and transportation engineering , maste... \n", + "1 NaN \n", + "2 [[, course: social skills, university of salam... \n", + "4 [[ao abrigo da bolsa santander ie best practic... \n", + "5 NaN \n", + ".. ... \n", + "134 [[biology, ph.d., harvard university, cambridg... \n", + "136 [[, doctor en filología, universidad de sevill... \n", + "137 [[integration and economic and territorial dev... \n", + "139 [[economics, doctor of philosophy , curtin uni... \n", + "140 NaN \n", + "\n", + " employment n_works \\\n", + "0 [[senior lecturer, universitas syiah kuala, ba... 6 \n", + "1 NaN 1 \n", + "2 [[merchandise reception and expedition trainer... 11 \n", + "4 [[presidente da comissão de acreditação do nov... 275 \n", + "5 [[, università per stranieri di perugia, perug... 6 \n", + ".. ... ... \n", + "134 NaN 45 \n", + "136 [[, grupo de investigación en lógica, lenguaje... 35 \n", + "137 [[director, university of puerto rico at río p... 132 \n", + "139 [[director, educational development, strathmor... 4 \n", + "140 [[lecturer, union of myanmar ministry of educa... 2 \n", + "\n", + " works_source activation_date \\\n", + "0 nurul malahayati 2017-10-01t00:46:31.324z \n", + "1 carlos barrera 2016-08-29t20:32:10.362z \n", + "2 nuria hernández-león 2015-11-28t07:18:58.442z \n", + "4 clara sarmento 2013-12-12t00:33:58.190z \n", + "5 michele dantini 2014-02-22t17:01:43.444z \n", + ".. ... ... \n", + "134 robert j. o’hara 2014-09-21t02:45:19.620z \n", + "136 jesús portillo-fernández 2015-03-08t20:37:16.590z \n", + "137 ángel carrión-tavárez 2017-12-30t19:25:41.566z \n", + "139 caroline wanjiru kariuki 2020-03-18t10:18:04.007z \n", + "140 myo kyaw hlaing 2018-12-26t12:51:57.801z \n", + "\n", + " last_update_date n_doi n_arxiv n_pmc n_other_pids label \\\n", + "0 2019-08-19t15:52:47.253z 3 0 0 3 0 \n", + "1 2021-02-09t04:56:35.554z 0 0 0 0 0 \n", + "2 2021-03-05t16:37:47.403z 1 0 0 4 0 \n", + "4 2020-10-12t14:43:00.749z 17 0 0 60 0 \n", + "5 2019-11-25t20:21:04.714z 0 0 0 0 0 \n", + ".. ... ... ... ... ... ... \n", + "134 2020-07-09t06:51:09.228z 23 0 0 72 0 \n", + "136 2021-03-12t22:05:28.976z 0 0 0 0 0 \n", + "137 2021-03-13t23:21:59.069z 13 0 0 28 0 \n", + "139 2021-02-11t14:40:38.515z 1 0 0 0 0 \n", + "140 2021-01-26t14:36:47.421z 1 0 0 2 0 \n", + "\n", + " primary_email_domain other_email_domains \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "4 NaN NaN \n", + "5 NaN NaN \n", + ".. ... ... \n", + "134 NaN NaN \n", + "136 NaN NaN \n", + "137 NaN NaN \n", + "139 NaN NaN \n", + "140 NaN NaN \n", + "\n", + " url_domains n_emails n_urls \\\n", + "0 [google.com, ristekdikti.go.id, unsyiah.ac.id,... NaN 16.0 \n", + "1 [blogspot.mx, behance.net, authorstream.com, d... NaN 24.0 \n", + "2 [feriaempresamujer.com, escueladenegociosydire... NaN 16.0 \n", + "4 [iscap.pt, google.pt, academia.edu, researchga... NaN 13.0 \n", + "5 [unipmn.it, huffingtonpost.it, roars.it, doppi... NaN 15.0 \n", + ".. ... ... ... \n", + "134 [rjohara.net, google.com, collegiateway.org, r... NaN 12.0 \n", + "136 [us.es, us.es, us.es, google.es, microsoft.com... NaN 12.0 \n", + "137 [academia.edu, redalyc.org, directorioexit.inf... NaN 11.0 \n", + "139 [scopus.com, mendeley.com, publons.com, resear... NaN 13.0 \n", + "140 [facebook.com, linkedin.com, instagram.com, re... NaN 12.0 \n", + "\n", + " n_ids n_keywords n_education n_employment \n", + "0 1.0 NaN 2.0 1.0 \n", + "1 1.0 8.0 NaN NaN \n", + "2 NaN 7.0 19.0 16.0 \n", + "4 1.0 6.0 8.0 37.0 \n", + "5 NaN 6.0 NaN 1.0 \n", + ".. ... ... ... ... \n", + "134 3.0 5.0 1.0 NaN \n", + "136 1.0 5.0 5.0 1.0 \n", + "137 1.0 6.0 4.0 3.0 \n", + "139 NaN 4.0 3.0 6.0 \n", + "140 NaN 1.0 NaN 2.0 \n", + "\n", + "[115 rows x 30 columns]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "exploded_sources[exploded_sources.apply(lambda x: x['works_source'].find(x['given_names']) >= 0, axis=1)]" ] @@ -1085,34 +10544,178 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "metadata": {}, - "outputs": [], - "source": [ - "df['n_ids'] = df[df['external_ids'].notna()].external_ids.str.len()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "count 1.301959e+06\n", + "mean 1.358640e+00\n", + "std 6.635087e-01\n", + "min 1.000000e+00\n", + "25% 1.000000e+00\n", + "50% 1.000000e+00\n", + "75% 2.000000e+00\n", + "max 8.000000e+01\n", + "Name: n_ids, dtype: float64" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df.n_ids.describe()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesprimary_emailkeywordsexternal_idseducationemploymentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsurl_domainsn_emailsn_urlsn_idsn_keywordsn_educationn_employment
0000-0002-9554-6633111john awilliamsNaNNaNNaNNaN[[scopus author id,  55553733518], [scopus aut...NaN[[, aston university, birmingham, , gb, 1722, ...92[aston research explorer]2014-11-20t09:42:10.690z2021-03-12t01:00:39.996z80002080NaNNaN[aston.ac.uk]NaN1.080.0NaNNaN1.0
\n", + "
" + ], + "text/plain": [ + " orcid verified_email verified_primary_email \\\n", + "0000-0002-9554-6633 1 1 1 \n", + "\n", + " given_names family_name biography other_names \\\n", + "0000-0002-9554-6633 john a williams NaN NaN \n", + "\n", + " primary_email keywords \\\n", + "0000-0002-9554-6633 NaN NaN \n", + "\n", + " external_ids \\\n", + "0000-0002-9554-6633 [[scopus author id,  55553733518], [scopus aut... \n", + "\n", + " education \\\n", + "0000-0002-9554-6633 NaN \n", + "\n", + " employment \\\n", + "0000-0002-9554-6633 [[, aston university, birmingham, , gb, 1722, ... \n", + "\n", + " n_works works_source \\\n", + "0000-0002-9554-6633 92 [aston research explorer] \n", + "\n", + " activation_date last_update_date \\\n", + "0000-0002-9554-6633 2014-11-20t09:42:10.690z 2021-03-12t01:00:39.996z \n", + "\n", + " n_doi n_arxiv n_pmc n_other_pids label \\\n", + "0000-0002-9554-6633 80 0 0 208 0 \n", + "\n", + " primary_email_domain other_email_domains url_domains \\\n", + "0000-0002-9554-6633 NaN NaN [aston.ac.uk] \n", + "\n", + " n_emails n_urls n_ids n_keywords n_education \\\n", + "0000-0002-9554-6633 NaN 1.0 80.0 NaN NaN \n", + "\n", + " n_employment \n", + "0000-0002-9554-6633 1.0 " + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "df[df.n_ids == df.n_ids.max()]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ @@ -1121,7 +10724,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -1130,16 +10733,91 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidexternal_idsprovider
91[researcherid, k-4630-2014]researcherid
291[scopus author id, 54394231000]scopus author id
471[researcherid, p-2223-2018]researcherid
511[scopus author id, 57189297461]scopus author id
651[scopus author id, 8399842800]scopus author id
\n", + "
" + ], + "text/plain": [ + " orcid external_ids provider\n", + "9 1 [researcherid, k-4630-2014] researcherid\n", + "29 1 [scopus author id, 54394231000] scopus author id\n", + "47 1 [researcherid, p-2223-2018] researcherid\n", + "51 1 [scopus author id, 57189297461] scopus author id\n", + "65 1 [scopus author id, 8399842800] scopus author id" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ids[ids.provider.notna()].head()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -1148,9 +10826,970 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 41, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "linkText": "Export to plot.ly", + "plotlyServerURL": "https://plot.ly", + "showLink": false + }, + "data": [ + { + "type": "bar", + "x": [ + "scopus author id", + "researcherid", + "loop profile", + "ciência id", + "researcher name resolver id", + "中国科学家在线", + "sciprofile", + "isni", + "gnd", + "pitt id", + "technical university of denmark cwis", + "researcher id", + "id dialnet", + "digital author id", + "scopus author id: ", + "authenticusid", + "hku researcherpage", + "uow scholars", + "cti vitae", + "scopus author id:", + "hkust profile", + "chalmers id", + "scopus id", + "iauthor", + "google scholar", + "digital author id (dai)", + "authid", + "dai", + "us epa vivo", + "scopus id", + "authenticus", + "smithsonian profiles", + "github", + "escientist", + "vivo cornell", + "researcherid:", + "id dialnet:", + "dialnet id", + "sciprofiles", + "kaken", + "une researcher id", + "researcherid: ", + "orcid", + "scienceopen", + "profile system identifier", + "orcid id", + "custom" + ], + "y": [ + 1030807, + 544825, + 117325, + 36666, + 7907, + 4804, + 4411, + 3075, + 2954, + 2674, + 2483, + 1445, + 1168, + 1124, + 1077, + 869, + 741, + 646, + 581, + 548, + 522, + 430, + 254, + 212, + 200, + 177, + 175, + 155, + 146, + 127, + 83, + 61, + 51, + 49, + 46, + 39, + 7, + 6, + 5, + 5, + 4, + 3, + 2, + 1, + 1, + 1, + 1 + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "IDs provided by providers" + }, + "xaxis": { + "tickangle": 45, + "tickfont": { + "size": 12 + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "data = [\n", " go.Bar(\n", @@ -1169,9 +11808,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 42, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([nan, 'researcherid', 'scopus author id', 'loop profile', 'gnd',\n", + " 'ciência id', 'researcher name resolver id', 'pitt id',\n", + " 'id dialnet', 'isni', 'technical university of denmark cwis',\n", + " 'chalmers id', 'scopus author id: ', 'scopus author id:',\n", + " 'hkust profile', 'hku researcherpage', '中国科学家在线', 'uow scholars',\n", + " 'digital author id', 'sciprofile', 'digital author id (dai)',\n", + " 'cti vitae', 'researcher id', 'authid', 'authenticusid',\n", + " 'vivo cornell', 'us epa vivo', 'escientist',\n", + " 'smithsonian profiles', 'authenticus', 'github', 'iauthor',\n", + " 'orcid id', 'dai', 'scopus id', 'scopus id', 'google scholar',\n", + " 'researcherid:', 'kaken', 'dialnet id', 'researcherid: ',\n", + " 'une researcher id', 'sciprofiles', 'id dialnet:', 'scienceopen',\n", + " 'orcid', 'profile system identifier', 'custom'], dtype=object)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pd.unique(ids['provider'])" ] @@ -1192,94 +11854,1198 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 43, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidn_keywords
0000-0002-0673-03411154.0
0000-0003-3343-56601148.0
0000-0002-7060-41121140.0
0000-0002-6075-35011140.0
0000-0001-5287-19491132.0
.........
0000-0002-1686-19351NaN
0000-0002-3800-63311NaN
0000-0002-8783-58141NaN
0000-0002-7584-22831NaN
0000-0003-0529-35381NaN
\n", + "

10916574 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " orcid n_keywords\n", + "0000-0002-0673-0341 1 154.0\n", + "0000-0003-3343-5660 1 148.0\n", + "0000-0002-7060-4112 1 140.0\n", + "0000-0002-6075-3501 1 140.0\n", + "0000-0001-5287-1949 1 132.0\n", + "... ... ...\n", + "0000-0002-1686-1935 1 NaN\n", + "0000-0002-3800-6331 1 NaN\n", + "0000-0002-8783-5814 1 NaN\n", + "0000-0002-7584-2283 1 NaN\n", + "0000-0003-0529-3538 1 NaN\n", + "\n", + "[10916574 rows x 2 columns]" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "df[df['orcid'] == AM]['keywords'].values[0]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "I did a good job. The following instead is dirty" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df[df['orcid'] == PP]['keywords'].values[0]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "So the keyword field needs some cleaning" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def fix_keywords(lst):\n", - " fixed = set()\n", - " for k in lst:\n", - " tokens = set(k.split(','))\n", - " for t in tokens:\n", - " fixed.add(str.strip(t))\n", - " fixed.discard('')\n", - " return list(fixed)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df['fixed_keywords'] = df[df.keywords.notna()]['keywords'].apply(lambda x: fix_keywords(x))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df[df['orcid'] == PP]['fixed_keywords'].values[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df['n_keywords'] = df.keywords.str.len()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "keywords_by_orcid = df.sort_values('n_keywords', ascending=False)[['orcid', 'n_keywords']]\n", + "keywords_by_orcid = df[['orcid', 'n_keywords']].sort_values('n_keywords', ascending=False)\n", "keywords_by_orcid" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 44, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "linkText": "Export to plot.ly", + "plotlyServerURL": "https://plot.ly", + "showLink": false + }, + "data": [ + { + "type": "bar", + "x": [ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1 + ], + "y": [ + 154, + 148, + 140, + 140, + 132, + 124, + 115, + 106, + 105, + 102, + 100, + 94, + 92, + 92, + 88, + 86, + 78, + 77, + 75, + 75, + 72, + 71, + 70, + 68, + 68, + 68, + 67, + 66, + 64, + 64, + 63, + 62, + 61, + 61, + 61, + 60, + 60, + 56, + 55, + 54, + 53, + 53, + 53, + 53, + 53, + 52, + 52, + 52, + 51, + 51, + 51, + 50, + 50, + 50, + 50, + 50, + 49, + 49, + 49, + 49, + 48, + 48, + 48, + 48, + 48, + 47, + 47, + 47, + 47, + 46, + 46, + 46, + 45, + 45, + 45, + 45, + 44, + 44, + 44, + 44, + 44, + 44, + 44, + 44, + 44, + 43, + 43, + 43, + 43, + 43, + 43, + 43, + 43, + 42, + 42, + 42, + 42, + 42, + 42, + 41 + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Keywords provided by ORCiD" + }, + "xaxis": { + "range": [ + -0.5, + 99.5 + ], + "tickangle": 45, + "tickfont": { + "size": 12 + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "set_top_n(100)\n", "data = [\n", @@ -1299,7 +13065,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 45, "metadata": {}, "outputs": [], "source": [ @@ -1313,9 +13079,976 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "linkText": "Export to plot.ly", + "plotlyServerURL": "https://plot.ly", + "showLink": false + }, + "data": [ + { + "type": "bar", + "x": [ + "machine learning", + "bioinformatics", + "education", + "molecular biology", + "cancer", + "ecology", + "artificial intelligence", + "epidemiology", + "public health", + "microbiology", + "neuroscience", + "immunology", + "genetics", + "climate change", + "remote sensing", + "biochemistry", + "genomics", + "biotechnology", + "nanotechnology", + "sustainability", + "gis", + "educación", + "deep learning", + "psychology", + "computer vision", + "marketing", + "nutrition", + "innovation", + "data science", + "statistics", + "data mining", + "image processing", + "nanomaterials", + "robotics", + "management", + "optimization", + "chemistry", + "renewable energy", + "gender", + "diabetes", + "biomaterials", + "educação", + "architecture", + "catalysis", + "history", + "electrochemistry", + "evolution", + "research", + "energy", + "biodiversity" + ], + "y": [ + 8508, + 5399, + 5169, + 4536, + 4150, + 3906, + 3808, + 3774, + 3666, + 3525, + 3483, + 3455, + 3329, + 3328, + 3261, + 2977, + 2788, + 2670, + 2661, + 2643, + 2499, + 2495, + 2442, + 2368, + 2289, + 2199, + 2185, + 2143, + 2139, + 2138, + 2104, + 2093, + 2089, + 2080, + 2067, + 2064, + 2003, + 2000, + 1990, + 1989, + 1989, + 1847, + 1823, + 1809, + 1809, + 1797, + 1795, + 1776, + 1762, + 1712 + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Top-50 keywords occurrence" + }, + "xaxis": { + "tickangle": 45, + "tickfont": { + "size": 12 + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "set_top_n(50)\n", "data = [\n", @@ -1337,7 +14070,24 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Correlation" + "## Education" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_education(lst):\n", + " educations = []\n", + " for e in lst:\n", + " # e[0] degree\n", + " # e[1] role\n", + " # e[2] university\n", + " # e[..] city, region, country, id, id_scheme\n", + " educations.append(' '.join([e[0], e[1], e[2]]))\n", + " return educations" ] }, { @@ -1345,8 +14095,2243 @@ "execution_count": null, "metadata": {}, "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, "source": [ - "fig = px.imshow(df.fillna(0).corr())\n", + "## Employment" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "def extract_employment(lst):\n", + " res = []\n", + " for e in lst:\n", + " # e[0] role\n", + " # e[1] institute\n", + " # e[..] city, region, country, id, id_scheme\n", + " res.append(' '.join([e[0], e[1]]))\n", + " return res" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Biography" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "df['biography'] = df[df.biography.notna()]['biography'].replace('', np.NaN)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 348649\n", + "unique 332523\n", + "top car title loans are a more straightforward way...\n", + "freq 343\n", + "Name: biography, dtype: object" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.biography.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesprimary_emailkeywordsexternal_idseducationemploymentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsurl_domainsn_emailsn_urlsn_idsn_keywordsn_educationn_employment
0000-0002-7397-7977111premium cartitle loanscar title loans are a more straightforward way...[premium car title loans]NaN[car title loan upland]NaNNaNNaN0NaN2020-11-06t06:10:20.070z2020-11-06t06:24:28.005z00000NaNNaN[premiumcartitleloans.com]NaN1.0NaN1.0NaNNaN
0000-0003-4931-9736111premium cartitle loanscar title loans are a more straightforward way...[premium car title loans]NaN[car title loan saratoga]NaNNaNNaN0NaN2020-11-13t01:04:19.859z2020-11-13t01:15:12.546z00000NaNNaN[premiumcartitleloans.com]NaN1.0NaN1.0NaNNaN
0000-0001-8221-2303111premium cartitle loanscar title loans are a more straightforward way...[premium car title loans]NaN[car title loan victorville]NaNNaNNaN0NaN2020-11-05t00:38:21.096z2020-11-05t00:40:40.091z00000NaNNaN[premiumcartitleloans.com]NaN1.0NaN1.0NaNNaN
0000-0001-6736-072X111premium cartitle loanscar title loans are a more straightforward way...NaNNaNNaNNaNNaNNaN0NaN2020-12-08t05:38:30.786z2020-12-08t05:40:03.786z00000NaNNaN[premiumcartitleloans.com]NaN1.0NaNNaNNaNNaN
0000-0002-8727-1246111premium cartitle loanscar title loans are a more straightforward way...[loan agency]NaN[car title loan online, car title loan north o...NaNNaNNaN0NaN2020-12-10t08:54:56.127z2020-12-10t08:57:15.791z00000NaNNaN[premiumcartitleloans.com]NaN1.0NaN4.0NaNNaN
.............................................................................................
0000-0002-9640-8136111premium cartitle loanscar title loans are a more straightforward way...[premium car title loans]NaN[car title loan clovis]NaNNaNNaN0NaN2020-10-22t06:11:02.945z2020-10-22t06:17:09.111z00000NaNNaN[premiumcartitleloans.com]NaN1.0NaN1.0NaNNaN
0000-0002-6926-3752111premium cartitle loanscar title loans are a more straightforward way...[premium car title loans]NaN[car title loan escondido]NaNNaNNaN0NaN2020-12-03t02:00:33.684z2020-12-03t02:02:07.054z00000NaNNaN[premiumcartitleloans.com]NaN1.0NaN1.0NaNNaN
0000-0002-3655-4713111premium cartitle loanscar title loans are a more straightforward way...[premium car title loans]NaN[car title loan san rafael]NaNNaNNaN0NaN2020-11-18t00:39:17.492z2020-11-18t00:52:19.024z00000NaNNaN[premiumcartitleloans.com]NaN1.0NaN1.0NaNNaN
0000-0002-8724-1020111premium cartitle loanscar title loans are a more straightforward way...[premium car title loans]NaN[car title loan san juan capistrano]NaNNaNNaN0NaN2020-11-19t00:31:54.080z2020-11-19t00:34:08.721z00000NaNNaN[premiumcartitleloans.com]NaN1.0NaN1.0NaNNaN
0000-0002-4601-4569111premium cartitle loanscar title loans are a more straightforward way...[premium car title loans]NaN[car title loan mount pleasant]NaNNaNNaN0NaN2020-10-16t00:32:26.207z2020-10-16t00:37:42.646z00000NaNNaN[premiumcartitleloans.com]NaN1.0NaN1.0NaNNaN
\n", + "

421 rows × 30 columns

\n", + "
" + ], + "text/plain": [ + " orcid verified_email verified_primary_email \\\n", + "0000-0002-7397-7977 1 1 1 \n", + "0000-0003-4931-9736 1 1 1 \n", + "0000-0001-8221-2303 1 1 1 \n", + "0000-0001-6736-072X 1 1 1 \n", + "0000-0002-8727-1246 1 1 1 \n", + "... ... ... ... \n", + "0000-0002-9640-8136 1 1 1 \n", + "0000-0002-6926-3752 1 1 1 \n", + "0000-0002-3655-4713 1 1 1 \n", + "0000-0002-8724-1020 1 1 1 \n", + "0000-0002-4601-4569 1 1 1 \n", + "\n", + " given_names family_name \\\n", + "0000-0002-7397-7977 premium car title loans \n", + "0000-0003-4931-9736 premium car title loans \n", + "0000-0001-8221-2303 premium car title loans \n", + "0000-0001-6736-072X premium car title loans \n", + "0000-0002-8727-1246 premium car title loans \n", + "... ... ... \n", + "0000-0002-9640-8136 premium car title loans \n", + "0000-0002-6926-3752 premium car title loans \n", + "0000-0002-3655-4713 premium car title loans \n", + "0000-0002-8724-1020 premium car title loans \n", + "0000-0002-4601-4569 premium car title loans \n", + "\n", + " biography \\\n", + "0000-0002-7397-7977 car title loans are a more straightforward way... \n", + "0000-0003-4931-9736 car title loans are a more straightforward way... \n", + "0000-0001-8221-2303 car title loans are a more straightforward way... \n", + "0000-0001-6736-072X car title loans are a more straightforward way... \n", + "0000-0002-8727-1246 car title loans are a more straightforward way... \n", + "... ... \n", + "0000-0002-9640-8136 car title loans are a more straightforward way... \n", + "0000-0002-6926-3752 car title loans are a more straightforward way... \n", + "0000-0002-3655-4713 car title loans are a more straightforward way... \n", + "0000-0002-8724-1020 car title loans are a more straightforward way... \n", + "0000-0002-4601-4569 car title loans are a more straightforward way... \n", + "\n", + " other_names primary_email \\\n", + "0000-0002-7397-7977 [premium car title loans] NaN \n", + "0000-0003-4931-9736 [premium car title loans] NaN \n", + "0000-0001-8221-2303 [premium car title loans] NaN \n", + "0000-0001-6736-072X NaN NaN \n", + "0000-0002-8727-1246 [loan agency] NaN \n", + "... ... ... \n", + "0000-0002-9640-8136 [premium car title loans] NaN \n", + "0000-0002-6926-3752 [premium car title loans] NaN \n", + "0000-0002-3655-4713 [premium car title loans] NaN \n", + "0000-0002-8724-1020 [premium car title loans] NaN \n", + "0000-0002-4601-4569 [premium car title loans] NaN \n", + "\n", + " keywords \\\n", + "0000-0002-7397-7977 [car title loan upland] \n", + "0000-0003-4931-9736 [car title loan saratoga] \n", + "0000-0001-8221-2303 [car title loan victorville] \n", + "0000-0001-6736-072X NaN \n", + "0000-0002-8727-1246 [car title loan online, car title loan north o... \n", + "... ... \n", + "0000-0002-9640-8136 [car title loan clovis] \n", + "0000-0002-6926-3752 [car title loan escondido] \n", + "0000-0002-3655-4713 [car title loan san rafael] \n", + "0000-0002-8724-1020 [car title loan san juan capistrano] \n", + "0000-0002-4601-4569 [car title loan mount pleasant] \n", + "\n", + " external_ids education employment n_works works_source \\\n", + "0000-0002-7397-7977 NaN NaN NaN 0 NaN \n", + "0000-0003-4931-9736 NaN NaN NaN 0 NaN \n", + "0000-0001-8221-2303 NaN NaN NaN 0 NaN \n", + "0000-0001-6736-072X NaN NaN NaN 0 NaN \n", + "0000-0002-8727-1246 NaN NaN NaN 0 NaN \n", + "... ... ... ... ... ... \n", + "0000-0002-9640-8136 NaN NaN NaN 0 NaN \n", + "0000-0002-6926-3752 NaN NaN NaN 0 NaN \n", + "0000-0002-3655-4713 NaN NaN NaN 0 NaN \n", + "0000-0002-8724-1020 NaN NaN NaN 0 NaN \n", + "0000-0002-4601-4569 NaN NaN NaN 0 NaN \n", + "\n", + " activation_date last_update_date \\\n", + "0000-0002-7397-7977 2020-11-06t06:10:20.070z 2020-11-06t06:24:28.005z \n", + "0000-0003-4931-9736 2020-11-13t01:04:19.859z 2020-11-13t01:15:12.546z \n", + "0000-0001-8221-2303 2020-11-05t00:38:21.096z 2020-11-05t00:40:40.091z \n", + "0000-0001-6736-072X 2020-12-08t05:38:30.786z 2020-12-08t05:40:03.786z \n", + "0000-0002-8727-1246 2020-12-10t08:54:56.127z 2020-12-10t08:57:15.791z \n", + "... ... ... \n", + "0000-0002-9640-8136 2020-10-22t06:11:02.945z 2020-10-22t06:17:09.111z \n", + "0000-0002-6926-3752 2020-12-03t02:00:33.684z 2020-12-03t02:02:07.054z \n", + "0000-0002-3655-4713 2020-11-18t00:39:17.492z 2020-11-18t00:52:19.024z \n", + "0000-0002-8724-1020 2020-11-19t00:31:54.080z 2020-11-19t00:34:08.721z \n", + "0000-0002-4601-4569 2020-10-16t00:32:26.207z 2020-10-16t00:37:42.646z \n", + "\n", + " n_doi n_arxiv n_pmc n_other_pids label \\\n", + "0000-0002-7397-7977 0 0 0 0 0 \n", + "0000-0003-4931-9736 0 0 0 0 0 \n", + "0000-0001-8221-2303 0 0 0 0 0 \n", + "0000-0001-6736-072X 0 0 0 0 0 \n", + "0000-0002-8727-1246 0 0 0 0 0 \n", + "... ... ... ... ... ... \n", + "0000-0002-9640-8136 0 0 0 0 0 \n", + "0000-0002-6926-3752 0 0 0 0 0 \n", + "0000-0002-3655-4713 0 0 0 0 0 \n", + "0000-0002-8724-1020 0 0 0 0 0 \n", + "0000-0002-4601-4569 0 0 0 0 0 \n", + "\n", + " primary_email_domain other_email_domains \\\n", + "0000-0002-7397-7977 NaN NaN \n", + "0000-0003-4931-9736 NaN NaN \n", + "0000-0001-8221-2303 NaN NaN \n", + "0000-0001-6736-072X NaN NaN \n", + "0000-0002-8727-1246 NaN NaN \n", + "... ... ... \n", + "0000-0002-9640-8136 NaN NaN \n", + "0000-0002-6926-3752 NaN NaN \n", + "0000-0002-3655-4713 NaN NaN \n", + "0000-0002-8724-1020 NaN NaN \n", + "0000-0002-4601-4569 NaN NaN \n", + "\n", + " url_domains n_emails n_urls n_ids \\\n", + "0000-0002-7397-7977 [premiumcartitleloans.com] NaN 1.0 NaN \n", + "0000-0003-4931-9736 [premiumcartitleloans.com] NaN 1.0 NaN \n", + "0000-0001-8221-2303 [premiumcartitleloans.com] NaN 1.0 NaN \n", + "0000-0001-6736-072X [premiumcartitleloans.com] NaN 1.0 NaN \n", + "0000-0002-8727-1246 [premiumcartitleloans.com] NaN 1.0 NaN \n", + "... ... ... ... ... \n", + "0000-0002-9640-8136 [premiumcartitleloans.com] NaN 1.0 NaN \n", + "0000-0002-6926-3752 [premiumcartitleloans.com] NaN 1.0 NaN \n", + "0000-0002-3655-4713 [premiumcartitleloans.com] NaN 1.0 NaN \n", + "0000-0002-8724-1020 [premiumcartitleloans.com] NaN 1.0 NaN \n", + "0000-0002-4601-4569 [premiumcartitleloans.com] NaN 1.0 NaN \n", + "\n", + " n_keywords n_education n_employment \n", + "0000-0002-7397-7977 1.0 NaN NaN \n", + "0000-0003-4931-9736 1.0 NaN NaN \n", + "0000-0001-8221-2303 1.0 NaN NaN \n", + "0000-0001-6736-072X NaN NaN NaN \n", + "0000-0002-8727-1246 4.0 NaN NaN \n", + "... ... ... ... \n", + "0000-0002-9640-8136 1.0 NaN NaN \n", + "0000-0002-6926-3752 1.0 NaN NaN \n", + "0000-0002-3655-4713 1.0 NaN NaN \n", + "0000-0002-8724-1020 1.0 NaN NaN \n", + "0000-0002-4601-4569 1.0 NaN NaN \n", + "\n", + "[421 rows x 30 columns]" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[(df.biography.notna()) & (df.biography.str.contains('car title loans are a more straightforward'))]" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "def score(bio):\n", + " try:\n", + " return antispam.score(bio)\n", + " except: # if len(bio) < 3 the filter doesn't know how to handle that\n", + " return -1" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "df['spam_score'] = df[df.biography.notna()]['biography'].apply(lambda bio: score(bio))" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidbiography
0000-0003-0505-27341j
0000-0001-7686-10321hi
0000-0002-3417-72991.....
0000-0003-3794-12881m.d., ph.d.
0000-0001-9655-48061肿瘤
.........
0000-0003-3823-26781b.e, m.e. ph.d
0000-0003-4041-08401/
0000-0002-4285-85371
0000-0002-1545-87731hi
0000-0002-6302-42241.
\n", + "

343 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " orcid biography\n", + "0000-0003-0505-2734 1 j\n", + "0000-0001-7686-1032 1 hi\n", + "0000-0002-3417-7299 1 .....\n", + "0000-0003-3794-1288 1 m.d., ph.d.\n", + "0000-0001-9655-4806 1 肿瘤\n", + "... ... ...\n", + "0000-0003-3823-2678 1 b.e, m.e. ph.d\n", + "0000-0003-4041-0840 1 /\n", + "0000-0002-4285-8537 1 \n", + "0000-0002-1545-8773 1 hi\n", + "0000-0002-6302-4224 1 .\n", + "\n", + "[343 rows x 2 columns]" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.spam_score == -1][['orcid','biography']]" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "df['spam_score'] = df['spam_score'].replace(-1, np.NaN)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 3.483060e+05\n", + "mean 6.117792e-01\n", + "std 4.472392e-01\n", + "min 1.917500e-22\n", + "25% 1.969077e-02\n", + "50% 9.563239e-01\n", + "75% 9.999993e-01\n", + "max 1.000000e+00\n", + "Name: spam_score, dtype: float64" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.spam_score.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
biographyspam_score
0000-0002-2638-4108investigador de la universidad de oviedo. depa...1.000000
0000-0003-2862-6139formación académica en la temática de manejo d...1.000000
0000-0002-8873-189Xdoctor en educación, maestro en gerencia de la...1.000000
0000-0003-1291-3782possui graduação em psicologia pela pontifícia...1.000000
0000-0002-2263-6646roofing contractors in seattle waroofing contr...1.000000
.........
0000-0002-2606-3849jose ignacio peláez sánchez ha sido profesor e...0.999966
0000-0003-0459-4822mestranda em tecnologia na saúde e foi aluna o...1.000000
0000-0003-0057-1551the phd degree of pharmacy was received under ...1.000000
0000-0002-7878-164Xmostafa metwaly is an assistant lecturer at th...1.000000
0000-0002-6633-0673jual obat aborsi di tangerang, obat penggugur ...0.999999
\n", + "

119552 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " biography \\\n", + "0000-0002-2638-4108 investigador de la universidad de oviedo. depa... \n", + "0000-0003-2862-6139 formación académica en la temática de manejo d... \n", + "0000-0002-8873-189X doctor en educación, maestro en gerencia de la... \n", + "0000-0003-1291-3782 possui graduação em psicologia pela pontifícia... \n", + "0000-0002-2263-6646 roofing contractors in seattle waroofing contr... \n", + "... ... \n", + "0000-0002-2606-3849 jose ignacio peláez sánchez ha sido profesor e... \n", + "0000-0003-0459-4822 mestranda em tecnologia na saúde e foi aluna o... \n", + "0000-0003-0057-1551 the phd degree of pharmacy was received under ... \n", + "0000-0002-7878-164X mostafa metwaly is an assistant lecturer at th... \n", + "0000-0002-6633-0673 jual obat aborsi di tangerang, obat penggugur ... \n", + "\n", + " spam_score \n", + "0000-0002-2638-4108 1.000000 \n", + "0000-0003-2862-6139 1.000000 \n", + "0000-0002-8873-189X 1.000000 \n", + "0000-0003-1291-3782 1.000000 \n", + "0000-0002-2263-6646 1.000000 \n", + "... ... \n", + "0000-0002-2606-3849 0.999966 \n", + "0000-0003-0459-4822 1.000000 \n", + "0000-0003-0057-1551 1.000000 \n", + "0000-0002-7878-164X 1.000000 \n", + "0000-0002-6633-0673 0.999999 \n", + "\n", + "[119552 rows x 2 columns]" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.spam_score > 0.9999][['biography', 'spam_score']]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## All VS All correlation" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "coloraxis": "coloraxis", + "hovertemplate": "x: %{x}
y: %{y}
color: %{z}", + "name": "0", + "type": "heatmap", + "x": [ + "orcid", + "verified_email", + "verified_primary_email", + "n_works", + "n_doi", + "n_arxiv", + "n_pmc", + "n_other_pids", + "label", + "n_emails", + "n_urls", + "n_ids", + "n_keywords", + "n_education", + "n_employment", + "spam_score" + ], + "xaxis": "x", + "y": [ + "orcid", + "verified_email", + "verified_primary_email", + "n_works", + "n_doi", + "n_arxiv", + "n_pmc", + "n_other_pids", + "label", + "n_emails", + "n_urls", + "n_ids", + "n_keywords", + "n_education", + "n_employment", + "spam_score" + ], + "yaxis": "y", + "z": [ + [ + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null + ], + [ + null, + 1, + 0.965027512571463, + 0.07909450389448938, + 0.07267250617006468, + 0.006476297655163804, + 0.030600690078714833, + 0.06072651274610396, + null, + 0.01724525375901219, + 0.011734321412087068, + 0.08789094997479488, + 0.04349305792976212, + 0.06294096375432985, + 0.03249170979418152, + -0.0009676223108538206 + ], + [ + null, + 0.965027512571463, + 1, + 0.0819319323153257, + 0.07525273852111228, + 0.006701227684123712, + 0.031695842542234315, + 0.06287595850156351, + null, + 0.012922886805318948, + 0.012387715663549998, + 0.08913447886422585, + 0.04319428069032002, + 0.06287478936154348, + 0.03218830236670472, + -0.0012562388439236645 + ], + [ + null, + 0.07909450389448938, + 0.0819319323153257, + 1, + 0.9378826746732684, + 0.3126908705369688, + 0.3510119929598013, + 0.8350663052170557, + null, + 0.04718988564738906, + 0.05653910865456996, + 0.24172815699207165, + 0.10039832975281514, + 0.07841166999115001, + 0.13046589790007565, + 0.03185515400361228 + ], + [ + null, + 0.07267250617006468, + 0.07525273852111228, + 0.9378826746732684, + 1, + 0.35657234546387656, + 0.3621917393246728, + 0.8012635261223445, + null, + 0.043499007216112696, + 0.03647205658234223, + 0.2269939652523629, + 0.08797332017106713, + 0.059729319529628046, + 0.10807896768803292, + 0.022572749381159763 + ], + [ + null, + 0.006476297655163804, + 0.006701227684123712, + 0.3126908705369688, + 0.35657234546387656, + 1, + 0.0009026428265918365, + 0.24215761656047952, + null, + -0.0012865085810765875, + -0.001329117426167316, + 0.0055175608369640175, + 0.005343734662423831, + 0.002858131608668, + 0.012070494908066045, + -0.004264541425264081 + ], + [ + null, + 0.030600690078714833, + 0.031695842542234315, + 0.3510119929598013, + 0.3621917393246728, + 0.0009026428265918365, + 1, + 0.2568594049240261, + null, + 0.007094547284091986, + 0.009274871422764654, + 0.06890564721203653, + 0.04384027185991069, + 0.044227134178966364, + 0.06513883102687293, + 0.04384493133686868 + ], + [ + null, + 0.06072651274610396, + 0.06287595850156351, + 0.8350663052170557, + 0.8012635261223445, + 0.24215761656047952, + 0.2568594049240261, + 1, + null, + 0.03217450452465033, + 0.026308875350291965, + 0.23592717383228326, + 0.07883129209802732, + 0.05208032423018972, + 0.09185353246013575, + 0.026633147020694893 + ], + [ + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null, + null + ], + [ + null, + 0.01724525375901219, + 0.012922886805318948, + 0.04718988564738906, + 0.043499007216112696, + -0.0012865085810765875, + 0.007094547284091986, + 0.03217450452465033, + null, + 1, + 0.10958644107635762, + 0.04522619986981265, + 0.057271130821122146, + 0.042912264959997656, + 0.06989712580810882, + -0.003962136064906193 + ], + [ + null, + 0.011734321412087068, + 0.012387715663549998, + 0.05653910865456996, + 0.03647205658234223, + -0.001329117426167316, + 0.009274871422764654, + 0.026308875350291965, + null, + 0.10958644107635762, + 1, + 0.06946298201611982, + 0.14850020945342837, + 0.09587783320820187, + 0.10097489869640557, + 0.059397185555557654 + ], + [ + null, + 0.08789094997479488, + 0.08913447886422585, + 0.24172815699207165, + 0.2269939652523629, + 0.0055175608369640175, + 0.06890564721203653, + 0.23592717383228326, + null, + 0.04522619986981265, + 0.06946298201611982, + 1, + 0.0821637191798123, + 0.06185375286572581, + 0.10400677096543276, + 0.035508933757786715 + ], + [ + null, + 0.04349305792976212, + 0.04319428069032002, + 0.10039832975281514, + 0.08797332017106713, + 0.005343734662423831, + 0.04384027185991069, + 0.07883129209802732, + null, + 0.057271130821122146, + 0.14850020945342837, + 0.0821637191798123, + 1, + 0.13378013997427662, + 0.15480312032926746, + 0.04131338504782112 + ], + [ + null, + 0.06294096375432985, + 0.06287478936154348, + 0.07841166999115001, + 0.059729319529628046, + 0.002858131608668, + 0.044227134178966364, + 0.05208032423018972, + null, + 0.042912264959997656, + 0.09587783320820187, + 0.06185375286572581, + 0.13378013997427662, + 1, + 0.3541453788931816, + 0.07223966422815224 + ], + [ + null, + 0.03249170979418152, + 0.03218830236670472, + 0.13046589790007565, + 0.10807896768803292, + 0.012070494908066045, + 0.06513883102687293, + 0.09185353246013575, + null, + 0.06989712580810882, + 0.10097489869640557, + 0.10400677096543276, + 0.15480312032926746, + 0.3541453788931816, + 1, + 0.036360329635112675 + ], + [ + null, + -0.0009676223108538206, + -0.0012562388439236645, + 0.03185515400361228, + 0.022572749381159763, + -0.004264541425264081, + 0.04384493133686868, + 0.026633147020694893, + null, + -0.003962136064906193, + 0.059397185555557654, + 0.035508933757786715, + 0.04131338504782112, + 0.07223966422815224, + 0.036360329635112675, + 1 + ] + ] + } + ], + "layout": { + "coloraxis": { + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "margin": { + "t": 60 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "xaxis": { + "anchor": "y", + "constrain": "domain", + "domain": [ + 0, + 1 + ], + "scaleanchor": "y" + }, + "yaxis": { + "anchor": "x", + "autorange": "reversed", + "constrain": "domain", + "domain": [ + 0, + 1 + ] + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = px.imshow(df.corr())\n", "fig.show()" ] }, @@ -1359,11 +16344,90 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesprimary_emailkeywordsexternal_idseducationemploymentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelprimary_email_domainother_email_domainsurl_domainsn_emailsn_urlsn_idsn_keywordsn_educationn_employmentspam_score
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [orcid, verified_email, verified_primary_email, given_names, family_name, biography, other_names, primary_email, keywords, external_ids, education, employment, n_works, works_source, activation_date, last_update_date, n_doi, n_arxiv, n_pmc, n_other_pids, label, primary_email_domain, other_email_domains, url_domains, n_emails, n_urls, n_ids, n_keywords, n_education, n_employment, spam_score]\n", + "Index: []" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[df.label == 1]" + ] + }, + { + "cell_type": "code", + "execution_count": 60, "metadata": {}, "outputs": [], "source": [ - "df[df.label == 1]" + "# (df.n_works > 0) & (df.n_ids > 1)" ] }, { diff --git a/src/data/make_dataset.py b/src/data/make_dataset.py index 297e4a1..3ba2f8e 100644 --- a/src/data/make_dataset.py +++ b/src/data/make_dataset.py @@ -6,6 +6,31 @@ from dotenv import find_dotenv, load_dotenv import pandas as pd import ast import os +import tldextract + +def fix_keywords(lst): + fixed = set() + for k in lst: + tokens = set(k.split(',')) + for t in tokens: + fixed.add(str.strip(t)) + fixed.discard('') + return list(fixed) + +def extract_email_domains(lst): + res = [] + for email in lst: + res.append(email.split('@')[1]) + return res + +def extract_url_domains(lst): + domains = [] + for e in lst: + # e[0] is a string describing the url + # e[1] is the url + domain = tldextract.extract(e[1]) + domains.append(domain.registered_domain) + return domains @click.command() @click.argument('input_filepath', type=click.Path(exists=True)) @@ -17,8 +42,9 @@ def main(input_filepath, output_filepath): logger = logging.getLogger(__name__) logger.info('Making final data set from raw data') logger.info('Loading the zipped dataset') - df = pd.read_csv(os.path.join(input_filepath, 'initial_info_whole_20210322.tsv.gz'), compression='gzip', sep='\t', header=0, - names=['orcid', 'claimed','verified_email', 'verified_primary_email', + df = pd.read_csv(os.path.join(input_filepath, 'data.gz'), compression='gzip', + sep='\t', header=None, + names=['orcid','verified_email', 'verified_primary_email', 'given_names', 'family_name', 'biography', 'other_names', 'urls', 'primary_email', 'other_emails', 'keywords', 'external_ids', 'education', 'employment', 'n_works', 'works_source', 'activation_date', 'last_update_date', @@ -54,6 +80,25 @@ def main(input_filepath, output_filepath): df['label'] = df.orcid.isin(openaire_orcid['orcid']) df['label'] = df['label'].astype(int) + logger.info('Fixing keywords') + df['keywords'] = df[df.keywords.notna()]['keywords'].apply(lambda x: fix_keywords(x)) + + logger.info('Extracting domains from URLs and emails') + df['primary_email_domain'] = df[df.primary_email.notna()]['primary_email'].apply(lambda x: x.split('@')[1]) + df['other_email_domains'] = df[df.other_emails.notna()]['other_emails'].apply(lambda x: extract_email_domains(x)) + df['url_domains'] = df[df.urls.notna()]['urls'].apply(lambda x: extract_url_domains(x)) + + logger.info('Creating simple numeric columns') + df['n_emails'] = df.other_emails.str.len() + df['n_urls'] = df.url_domains.str.len() + df['n_ids'] = df.external_ids.str.len() + df['n_keywords'] = df.keywords.str.len() + df['n_education'] = df.education.str.len() + df['n_employment'] = df.employment.str.len() + + logger.info('Dropping useless columns') + df = df.drop(['urls', 'other_emails'], axis=1) + logger.info('Serializing the dataset in ./data/processed') n = 1000000 chunks = [df[i:i+n] for i in range(0, df.shape[0], n)]