{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Exploratory analysis" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "TODO:\n", "- Understanding the reason for fake profiles can bring insight on how to catch them (could be trivial with prior knowledge, e.g., SEO hacking => URLs)\n", "- Make casistics (e.g. author publishing with empty orcid, author publishing but not on OpenAIRE, etc.)\n", "- Temporal dimension of any use?\n", "- Can we access private info thanks to the OpenAIRE-ORCID agreement?\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "import ast\n", "import tldextract\n", "import numpy\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px\n", "\n", "init_notebook_mode(connected=True)\n", "TOP_N = 30\n", "TOP_RANGE = [-.5, TOP_N - 1 + .5]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Notable solid ORCID iDs for explorative purposes:" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "AM = '0000-0002-5193-7851'\n", "PP = '0000-0002-8588-4196'\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Anomalies ORCiD profile" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "JOURNAL = '0000-0003-1815-5732'\n", "NOINFO= '0000-0001-5009-2052'\n", "# todo: find group-shared ORCiD, if possible" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Notable fake ORCID iDs for explorative purposes:" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "SCAFFOLD = '0000-0001-5004-7761'\n", "WHATSAPP = '0000-0001-6997-9470'\n", "PENIS = '0000-0002-3399-7287'\n", "BITCOIN = '0000-0002-7518-6845'\n", "FITNESS_CHINA = '0000-0002-1234-835X' # URL record + employment\n", "CANNABIS = '0000-0002-9025-8632' # URL > 70 + works (REMOVED)\n", "PLUMBER = '0000-0002-1700-8311' # URL > 10 + works " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load the dataset" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('../data/raw/initial_info_whole.tsv', sep='\\t', header=0,\n", " names = ['orcid', 'claimed','verified_email', 'verified_primary_email', \n", " 'given_names', 'family_name', 'biography', 'other_names', 'urls', \n", " 'primary_email', 'other_emails', 'keywords', 'external_ids', 'education', \n", " 'employment', 'n_works', 'works_source'])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_emailother_emailskeywordsexternal_idseducationemploymentn_worksworks_source
75520000-0001-7831-7567111VahabVahdatNaNNaNNaNNaNNaNNaN[[\"Scopus Author ID\", \"57193490305\"], [\"Scopus...[[\"Industrial Engineering\", \"PhD\", \"Northeaste...[[\"Post-doctorate fellow\", \"Harvard Medical Sc...25[\"Vahab Vahdat\", \"Scopus - Elsevier\", \"Multidi...
84160000-0001-8161-1345111AYFERTEKIN ATACANNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaN
164980000-0002-1133-1505111XianrongLaiNaNNaNNaNNaNNaNNaN[[\"Scopus Author ID\", \"15769435500\"]][[\"Department of pharmacy\", \"Bachelor of Tradi...[[\"Associate Research, Professor\", \"Chengdu Un...115[\"Xianrong Lai\", \"Scopus - Elsevier\", \"Crossref\"]
168300000-0002-1257-5536111AlexandraZimmerNaNNaNNaNNaNNaNNaNNaNNaN[[\"Research assistent\", \"Fraunhofer-Institut f...0NaN
188350000-0002-2026-4156111FatmaSri WahyuniNaN[\"Ayu\"]NaNNaNNaNNaN[[\"ResearcherID\", \"C-5194-2015\"], [\"Scopus Aut...[[\"Biosains\", \"PHD\", \"Universiti Putra Malaysi...[[\"Lecturer\", \"Universitas Andalas\", \"Padang\",...27[\"Publons\", \"Crossref Metadata Search\", \"Scopu...
......................................................
107332930000-0002-9887-7788111MarkétaLaštůvkováNaNNaNNaNNaNNaNNaNNaNNaN[[\"\", \"VSB - Technical University of Ostrava\",...0NaN
107372580000-0003-1367-8104111LORENAGUTIÉRREZ GARCÍANaNNaN[[\"LinkedIn\", \"https://www.linkedin.com/in/lor...lorenagg@unex.esNaN[\"Agroecolog\\u00eda, Bot\\u00e1nica, Did\\u00e1c...[[\"ResearcherID\", \"AAE-6316-2021\"]][[\"\", \"M\\u00e1ster en Formaci\\u00f3n del profe...[[\"PCI\", \"Universidad de Extremadura - Campus ...14[\"Multidisciplinary Digital Publishing Institu...
107383080000-0003-1741-3437111XingLiuNaNNaNNaNNaNNaNNaN[[\"ResearcherID\", \"S-3053-2017\"]]NaNNaN0NaN
107414600000-0003-2909-8585111YusufÖzcanNaNNaNNaNNaNNaNNaNNaN[[\"\\u0130lahiyat Fak\\u00fcltesi\", \"Doktora\", \"...[[\"Research Assistant\", \"\\u00c7ukurova Univers...0NaN
107450780000-0003-4259-5324111P Rama MohanNaNNaNNaNNaNNaNNaNNaN[[\"Scopus Author ID\", \"24776757000\"]][[\"EEE Department\", \"Ph.D. (Power Electronics ...[[\"Associate Professor\", \"RGM College of Engin...21[\"Scopus - Elsevier\", \"P Rama Mohan\"]
\n", "

2418 rows × 17 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email \\\n", "7552 0000-0001-7831-7567 1 1 \n", "8416 0000-0001-8161-1345 1 1 \n", "16498 0000-0002-1133-1505 1 1 \n", "16830 0000-0002-1257-5536 1 1 \n", "18835 0000-0002-2026-4156 1 1 \n", "... ... ... ... \n", "10733293 0000-0002-9887-7788 1 1 \n", "10737258 0000-0003-1367-8104 1 1 \n", "10738308 0000-0003-1741-3437 1 1 \n", "10741460 0000-0003-2909-8585 1 1 \n", "10745078 0000-0003-4259-5324 1 1 \n", "\n", " verified_primary_email given_names family_name biography \\\n", "7552 1 Vahab Vahdat NaN \n", "8416 1 AYFER TEKIN ATACAN NaN \n", "16498 1 Xianrong Lai NaN \n", "16830 1 Alexandra Zimmer NaN \n", "18835 1 Fatma Sri Wahyuni NaN \n", "... ... ... ... ... \n", "10733293 1 Markéta Laštůvková NaN \n", "10737258 1 LORENA GUTIÉRREZ GARCÍA NaN \n", "10738308 1 Xing Liu NaN \n", "10741460 1 Yusuf Özcan NaN \n", "10745078 1 P Rama Mohan NaN NaN \n", "\n", " other_names urls \\\n", "7552 NaN NaN \n", "8416 NaN NaN \n", "16498 NaN NaN \n", "16830 NaN NaN \n", "18835 [\"Ayu\"] NaN \n", "... ... ... \n", "10733293 NaN NaN \n", "10737258 NaN [[\"LinkedIn\", \"https://www.linkedin.com/in/lor... \n", "10738308 NaN NaN \n", "10741460 NaN NaN \n", "10745078 NaN NaN \n", "\n", " primary_email other_emails \\\n", "7552 NaN NaN \n", "8416 NaN NaN \n", "16498 NaN NaN \n", "16830 NaN NaN \n", "18835 NaN NaN \n", "... ... ... \n", "10733293 NaN NaN \n", "10737258 lorenagg@unex.es NaN \n", "10738308 NaN NaN \n", "10741460 NaN NaN \n", "10745078 NaN NaN \n", "\n", " keywords \\\n", "7552 NaN \n", "8416 NaN \n", "16498 NaN \n", "16830 NaN \n", "18835 NaN \n", "... ... \n", "10733293 NaN \n", "10737258 [\"Agroecolog\\u00eda, Bot\\u00e1nica, Did\\u00e1c... \n", "10738308 NaN \n", "10741460 NaN \n", "10745078 NaN \n", "\n", " external_ids \\\n", "7552 [[\"Scopus Author ID\", \"57193490305\"], [\"Scopus... \n", "8416 NaN \n", "16498 [[\"Scopus Author ID\", \"15769435500\"]] \n", "16830 NaN \n", "18835 [[\"ResearcherID\", \"C-5194-2015\"], [\"Scopus Aut... \n", "... ... \n", "10733293 NaN \n", "10737258 [[\"ResearcherID\", \"AAE-6316-2021\"]] \n", "10738308 [[\"ResearcherID\", \"S-3053-2017\"]] \n", "10741460 NaN \n", "10745078 [[\"Scopus Author ID\", \"24776757000\"]] \n", "\n", " education \\\n", "7552 [[\"Industrial Engineering\", \"PhD\", \"Northeaste... \n", "8416 NaN \n", "16498 [[\"Department of pharmacy\", \"Bachelor of Tradi... \n", "16830 NaN \n", "18835 [[\"Biosains\", \"PHD\", \"Universiti Putra Malaysi... \n", "... ... \n", "10733293 NaN \n", "10737258 [[\"\", \"M\\u00e1ster en Formaci\\u00f3n del profe... \n", "10738308 NaN \n", "10741460 [[\"\\u0130lahiyat Fak\\u00fcltesi\", \"Doktora\", \"... \n", "10745078 [[\"EEE Department\", \"Ph.D. (Power Electronics ... \n", "\n", " employment n_works \\\n", "7552 [[\"Post-doctorate fellow\", \"Harvard Medical Sc... 25 \n", "8416 NaN 0 \n", "16498 [[\"Associate Research, Professor\", \"Chengdu Un... 115 \n", "16830 [[\"Research assistent\", \"Fraunhofer-Institut f... 0 \n", "18835 [[\"Lecturer\", \"Universitas Andalas\", \"Padang\",... 27 \n", "... ... ... \n", "10733293 [[\"\", \"VSB - Technical University of Ostrava\",... 0 \n", "10737258 [[\"PCI\", \"Universidad de Extremadura - Campus ... 14 \n", "10738308 NaN 0 \n", "10741460 [[\"Research Assistant\", \"\\u00c7ukurova Univers... 0 \n", "10745078 [[\"Associate Professor\", \"RGM College of Engin... 21 \n", "\n", " works_source \n", "7552 [\"Vahab Vahdat\", \"Scopus - Elsevier\", \"Multidi... \n", "8416 NaN \n", "16498 [\"Xianrong Lai\", \"Scopus - Elsevier\", \"Crossref\"] \n", "16830 NaN \n", "18835 [\"Publons\", \"Crossref Metadata Search\", \"Scopu... \n", "... ... \n", "10733293 NaN \n", "10737258 [\"Multidisciplinary Digital Publishing Institu... \n", "10738308 NaN \n", "10741460 NaN \n", "10745078 [\"Scopus - Elsevier\", \"P Rama Mohan\"] \n", "\n", "[2418 rows x 17 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df.duplicated()]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "df.drop_duplicates(inplace=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Basic column manipulation (interpret columns as lists when necessary)" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "df['other_names'] = df[df.other_names.notna()]['other_names'].apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [], "source": [ "df['keywords'] = df[df.keywords.notna()]['keywords'].apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [], "source": [ "df['urls'] = df[df.urls.notna()]['urls'].apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [], "source": [ "df['other_emails'] = df[df.other_emails.notna()]['other_emails'].apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [], "source": [ "df['education'] = df[df.education.notna()]['education'].apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [], "source": [ "df['employment'] = df[df.employment.notna()]['employment'].apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "df['external_ids'] = df[df.external_ids.notna()]['external_ids'].apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [], "source": [ "df['works_source'] = df[df.works_source.notna()]['works_source'].apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_emailother_emailskeywordsexternal_idseducationemploymentn_worksworks_source
00000-0001-5000-2053100JorgeJaramillo SanchezNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaN
10000-0001-5000-6548100WisemanBekelesiNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaN
20000-0001-5000-7962111ALICEINDIMULINaNNaNNaNNaNNaNNaNNaNNaNNaN0NaN
30000-0001-5000-8586100shimji yunNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaN
40000-0001-5001-0256100SandroCaramaschiNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaN
\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "0 0000-0001-5000-2053 1 0 0 \n", "1 0000-0001-5000-6548 1 0 0 \n", "2 0000-0001-5000-7962 1 1 1 \n", "3 0000-0001-5000-8586 1 0 0 \n", "4 0000-0001-5001-0256 1 0 0 \n", "\n", " given_names family_name biography other_names urls primary_email \\\n", "0 Jorge Jaramillo Sanchez NaN NaN NaN NaN \n", "1 Wiseman Bekelesi NaN NaN NaN NaN \n", "2 ALICE INDIMULI NaN NaN NaN NaN \n", "3 shim ji yun NaN NaN NaN NaN \n", "4 Sandro Caramaschi NaN NaN NaN NaN \n", "\n", " other_emails keywords external_ids education employment n_works \\\n", "0 NaN NaN NaN NaN NaN 0 \n", "1 NaN NaN NaN NaN NaN 0 \n", "2 NaN NaN NaN NaN NaN 0 \n", "3 NaN NaN NaN NaN NaN 0 \n", "4 NaN NaN NaN NaN NaN 0 \n", "\n", " works_source \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(5)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_emailother_emailskeywordsexternal_idseducationemploymentn_worksworks_source
88404130000-0002-5193-7851111AndreaMannocciNaNNaN[[Personal website, https://andremann.github.i...andrea.mannocci@isti.cnr.itNaN[Data science , science of science, scholarly ...[[Scopus Author ID, 55233589900]][[Information engineering, Ph.D., Università d...[[Research Associate, Istituto di Scienza e Te...37[Scopus - Elsevier, Crossref Metadata Search, ...
\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "8840413 0000-0002-5193-7851 1 1 1 \n", "\n", " given_names family_name biography other_names \\\n", "8840413 Andrea Mannocci NaN NaN \n", "\n", " urls \\\n", "8840413 [[Personal website, https://andremann.github.i... \n", "\n", " primary_email other_emails \\\n", "8840413 andrea.mannocci@isti.cnr.it NaN \n", "\n", " keywords \\\n", "8840413 [Data science , science of science, scholarly ... \n", "\n", " external_ids \\\n", "8840413 [[Scopus Author ID, 55233589900]] \n", "\n", " education \\\n", "8840413 [[Information engineering, Ph.D., Università d... \n", "\n", " employment n_works \\\n", "8840413 [[Research Associate, Istituto di Scienza e Te... 37 \n", "\n", " works_source \n", "8840413 [Scopus - Elsevier, Crossref Metadata Search, ... " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['orcid'] == AM]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_emailother_emailskeywordsexternal_idseducationemploymentn_worksworks_source
95170990000-0001-6997-9470111otherwhatsappNaNNaN[[Otherwhatsapp, https://otherwhatsapp.com/], ...NaNNaN[Whatsapp GB, whatsapp gb 2020, whatsapp gb ba...NaNNaNNaN0NaN
\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "9517099 0000-0001-6997-9470 1 1 1 \n", "\n", " given_names family_name biography other_names \\\n", "9517099 other whatsapp NaN NaN \n", "\n", " urls primary_email \\\n", "9517099 [[Otherwhatsapp, https://otherwhatsapp.com/], ... NaN \n", "\n", " other_emails keywords \\\n", "9517099 NaN [Whatsapp GB, whatsapp gb 2020, whatsapp gb ba... \n", "\n", " external_ids education employment n_works works_source \n", "9517099 NaN NaN NaN 0 NaN " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['orcid'] == WHATSAPP]" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "orcid 10744622\n", "claimed 10744622\n", "verified_email 10744622\n", "verified_primary_email 10744622\n", "given_names 10716789\n", "family_name 10437094\n", "biography 333885\n", "other_names 544550\n", "urls 688262\n", "primary_email 121476\n", "other_emails 47470\n", "keywords 638634\n", "external_ids 1285292\n", "education 2402440\n", "employment 2626670\n", "n_works 10744622\n", "works_source 2671906\n", "dtype: int64" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.count()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_emailother_emailskeywordsexternal_idseducationemploymentn_worksworks_source
45952630000-0002-5154-6404111OlusolaBamisileNaNNaNNaNNaNNaNNaNNaN[[Energy Systems Engineering , Doctoral, Cypru...[[, University of Electronic Science and Techn...3[Multidisciplinary Digital Publishing Institut...
45952640000-0002-5154-6404111OlusolaBamisileNaNNaNNaNNaNNaNNaNNaN[[Energy Systems Engineering , Doctoral, Cypru...[[, University of Electronic Science and Techn...2[Crossref]
\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "4595263 0000-0002-5154-6404 1 1 1 \n", "4595264 0000-0002-5154-6404 1 1 1 \n", "\n", " given_names family_name biography other_names urls primary_email \\\n", "4595263 Olusola Bamisile NaN NaN NaN NaN \n", "4595264 Olusola Bamisile NaN NaN NaN NaN \n", "\n", " other_emails keywords external_ids \\\n", "4595263 NaN NaN NaN \n", "4595264 NaN NaN NaN \n", "\n", " education \\\n", "4595263 [[Energy Systems Engineering , Doctoral, Cypru... \n", "4595264 [[Energy Systems Engineering , Doctoral, Cypru... \n", "\n", " employment n_works \\\n", "4595263 [[, University of Electronic Science and Techn... 3 \n", "4595264 [[, University of Electronic Science and Techn... 2 \n", "\n", " works_source \n", "4595263 [Multidisciplinary Digital Publishing Institut... \n", "4595264 [Crossref] " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['orcid'] == '0000-0002-5154-6404']" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "df.drop(index=4595264, inplace=True)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 10744621\n", "unique 10744621\n", "top 0000-0002-3936-2047\n", "freq 1\n", "Name: orcid, dtype: object" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['orcid'].describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Primary email" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 121476\n", "unique 121473\n", "top opercin@erbakan.edu.tr\n", "freq 2\n", "Name: primary_email, dtype: object" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['primary_email'].describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Dupe emails" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "7483666 maykin@owasp.org\n", "9068234 opercin@erbakan.edu.tr\n", "10246485 patrick.davey@monash.edu\n", "Name: primary_email, dtype: object" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['primary_email'].dropna().loc[df['primary_email'].duplicated()]" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_emailother_emailskeywordsexternal_idseducationemploymentn_worksworks_source
37763500000-0002-0836-2271111MaykinWarasartNaNNaNNaNmaykin@owasp.org[maykin@dga.or.th]NaNNaNNaNNaN0NaN
74836660000-0001-9855-1676111MaykinWarasartNaNNaNNaNmaykin@owasp.org[maykin@dga.or.th, maykin@ieee.org]NaNNaNNaNNaN0NaN
\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "3776350 0000-0002-0836-2271 1 1 1 \n", "7483666 0000-0001-9855-1676 1 1 1 \n", "\n", " given_names family_name biography other_names urls primary_email \\\n", "3776350 Maykin Warasart NaN NaN NaN maykin@owasp.org \n", "7483666 Maykin Warasart NaN NaN NaN maykin@owasp.org \n", "\n", " other_emails keywords external_ids education \\\n", "3776350 [maykin@dga.or.th] NaN NaN NaN \n", "7483666 [maykin@dga.or.th, maykin@ieee.org] NaN NaN NaN \n", "\n", " employment n_works works_source \n", "3776350 NaN 0 NaN \n", "7483666 NaN 0 NaN " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['primary_email'] == 'maykin@owasp.org']" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_emailother_emailskeywordsexternal_idseducationemploymentn_worksworks_source
39950320000-0002-2232-9638111OsmanPerçinNaNNaNNaNopercin@erbakan.edu.trNaNNaNNaNNaNNaN0NaN
90682340000-0003-0033-0918111OsmanPERÇİNNaNNaNNaNopercin@erbakan.edu.trNaNNaNNaNNaN[[, Necmettin Erbakan University, Konya, , TR,...0NaN
\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "3995032 0000-0002-2232-9638 1 1 1 \n", "9068234 0000-0003-0033-0918 1 1 1 \n", "\n", " given_names family_name biography other_names urls \\\n", "3995032 Osman Perçin NaN NaN NaN \n", "9068234 Osman PERÇİN NaN NaN NaN \n", "\n", " primary_email other_emails keywords external_ids education \\\n", "3995032 opercin@erbakan.edu.tr NaN NaN NaN NaN \n", "9068234 opercin@erbakan.edu.tr NaN NaN NaN NaN \n", "\n", " employment n_works \\\n", "3995032 NaN 0 \n", "9068234 [[, Necmettin Erbakan University, Konya, , TR,... 0 \n", "\n", " works_source \n", "3995032 NaN \n", "9068234 NaN " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['primary_email'] == 'opercin@erbakan.edu.tr']" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_emailother_emailskeywordsexternal_idseducationemploymentn_worksworks_source
50877450000-0002-8774-0030111PatrickDaveyNaNNaNNaNpatrick.davey@monash.eduNaNNaNNaNNaN[[PhD Student, Monash University, Melbourne, V...1[Crossref]
102464850000-0002-9158-1757111PatrickDaveyNaNNaNNaNpatrick.davey@monash.eduNaN[Radiopharmaceuticals, Inorganic Chemistry, Bi...NaNNaN[[PhD Student, Monash University, Melbourne, ,...0NaN
\n", "
" ], "text/plain": [ " orcid claimed verified_email \\\n", "5087745 0000-0002-8774-0030 1 1 \n", "10246485 0000-0002-9158-1757 1 1 \n", "\n", " verified_primary_email given_names family_name biography \\\n", "5087745 1 Patrick Davey NaN \n", "10246485 1 Patrick Davey NaN \n", "\n", " other_names urls primary_email other_emails \\\n", "5087745 NaN NaN patrick.davey@monash.edu NaN \n", "10246485 NaN NaN patrick.davey@monash.edu NaN \n", "\n", " keywords external_ids \\\n", "5087745 NaN NaN \n", "10246485 [Radiopharmaceuticals, Inorganic Chemistry, Bi... NaN \n", "\n", " education employment \\\n", "5087745 NaN [[PhD Student, Monash University, Melbourne, V... \n", "10246485 NaN [[PhD Student, Monash University, Melbourne, ,... \n", "\n", " n_works works_source \n", "5087745 1 [Crossref] \n", "10246485 0 NaN " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['primary_email'] == 'patrick.davey@monash.edu']" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "df['primary_email_domain'] = df['primary_email'].apply(lambda x: x.split('@')[1] if pd.notna(x) else x)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 121476\n", "unique 17047\n", "top gmail.com\n", "freq 25892\n", "Name: primary_email_domain, dtype: object" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['primary_email_domain'].describe()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcid
primary_email_domain
gmail.com25892
hotmail.com3674
yahoo.com2578
163.com2067
yuhs.ac1124
......
iiap.gob.pe1
iiap.org.pe1
iibb.csic.es1
iic.hokudai.ac.jp1
zzuli.edu.cn1
\n", "

17047 rows × 1 columns

\n", "
" ], "text/plain": [ " orcid\n", "primary_email_domain \n", "gmail.com 25892\n", "hotmail.com 3674\n", "yahoo.com 2578\n", "163.com 2067\n", "yuhs.ac 1124\n", "... ...\n", "iiap.gob.pe 1\n", "iiap.org.pe 1\n", "iibb.csic.es 1\n", "iic.hokudai.ac.jp 1\n", "zzuli.edu.cn 1\n", "\n", "[17047 rows x 1 columns]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "primary_emails = df[['primary_email_domain', 'orcid']].groupby('primary_email_domain').count().sort_values('orcid', ascending=False)\n", "primary_emails" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "gmail.com", "hotmail.com", "yahoo.com", "163.com", "yuhs.ac", "qq.com", "outlook.com", "126.com", "bu.edu", "usgs.gov", "mail.ru", "usp.br", "yahoo.com.br", "ua.pt", "umich.edu", "ust.hk", "foxmail.com", "uomustansiriyah.edu.iq", "yandex.ru", "uq.edu.au", "ukr.net", "unesp.br", "ucl.ac.uk", "ieee.org", "stcatz.ox.ac.uk", "st-annes.ox.ac.uk", "naver.com", "yahoo.fr", "ucm.es", "live.com" ], "y": [ 25892, 3674, 2578, 2067, 1124, 1035, 914, 755, 626, 584, 564, 455, 454, 291, 290, 278, 249, 242, 237, 234, 220, 214, 204, 203, 185, 184, 182, 172, 166, 159 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Top 30 email domains" }, "xaxis": { "range": [ -0.5, 29.5 ], "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data = [\n", " go.Bar(\n", " x=primary_emails[:30].sort_values(by=['orcid'], ascending=False).index,\n", " y=primary_emails[:30].sort_values(by=['orcid'], ascending=False)['orcid']\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Top 30 email domains',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Other emails" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "def extract_email_domains(lst):\n", " res = []\n", " for email in lst:\n", " res.append(email.split('@')[1])\n", " return res" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "df['other_email_domains'] = df['other_emails'].apply(lambda x: extract_email_domains(x) if isinstance(x, list) else x)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_emailother_emailskeywordsexternal_idseducationemploymentn_worksworks_sourceprimary_email_domainother_email_domains
340000-0001-5011-9833111MarkKilbaneNaNNaNNaNmark.kilbane@seh.ox.ac.uk[mark.kilbane@bsg.ox.ac.uk]NaNNaN[[Blavatnik School of Government; St Edmund Ha...NaN0NaNseh.ox.ac.uk[bsg.ox.ac.uk]
470000-0001-5017-1295111XinfengTangNaNNaNNaNNaN[tang.xinfeng@foxmail.com]NaN[[Scopus Author ID, 56927186900]][[, , University of Hong Kong, Hong Kong, , HK...NaN11[Scopus - Elsevier, Xinfeng Tang]NaN[foxmail.com]
2990000-0001-5109-3989111colintysallNaNNaNNaNNaN[colin.tysall@nhs.net]NaNNaNNaN[[Associate Mental Health Act Manager, Coventr...0NaNNaN[nhs.net]
8680000-0001-5320-1277111GökhanKESKİNNaNNaNNaN2012001598@stu.adu.edu.tr[gokhankkeskin@gmail.com]NaNNaNNaN[[, Adnan Menderes University, Aydin, , TR, gr...0NaNstu.adu.edu.tr[gmail.com]
11760000-0001-5434-9994111ElenaBorucuNaNNaNNaNlenapasali@gmail.com[epasali@yildiz.edu.tr]NaNNaNNaNNaN0NaNgmail.com[yildiz.edu.tr]
\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "34 0000-0001-5011-9833 1 1 1 \n", "47 0000-0001-5017-1295 1 1 1 \n", "299 0000-0001-5109-3989 1 1 1 \n", "868 0000-0001-5320-1277 1 1 1 \n", "1176 0000-0001-5434-9994 1 1 1 \n", "\n", " given_names family_name biography other_names urls \\\n", "34 Mark Kilbane NaN NaN NaN \n", "47 Xinfeng Tang NaN NaN NaN \n", "299 colin tysall NaN NaN NaN \n", "868 Gökhan KESKİN NaN NaN NaN \n", "1176 Elena Borucu NaN NaN NaN \n", "\n", " primary_email other_emails keywords \\\n", "34 mark.kilbane@seh.ox.ac.uk [mark.kilbane@bsg.ox.ac.uk] NaN \n", "47 NaN [tang.xinfeng@foxmail.com] NaN \n", "299 NaN [colin.tysall@nhs.net] NaN \n", "868 2012001598@stu.adu.edu.tr [gokhankkeskin@gmail.com] NaN \n", "1176 lenapasali@gmail.com [epasali@yildiz.edu.tr] NaN \n", "\n", " external_ids \\\n", "34 NaN \n", "47 [[Scopus Author ID, 56927186900]] \n", "299 NaN \n", "868 NaN \n", "1176 NaN \n", "\n", " education \\\n", "34 [[Blavatnik School of Government; St Edmund Ha... \n", "47 [[, , University of Hong Kong, Hong Kong, , HK... \n", "299 NaN \n", "868 NaN \n", "1176 NaN \n", "\n", " employment n_works \\\n", "34 NaN 0 \n", "47 NaN 11 \n", "299 [[Associate Mental Health Act Manager, Coventr... 0 \n", "868 [[, Adnan Menderes University, Aydin, , TR, gr... 0 \n", "1176 NaN 0 \n", "\n", " works_source primary_email_domain \\\n", "34 NaN seh.ox.ac.uk \n", "47 [Scopus - Elsevier, Xinfeng Tang] NaN \n", "299 NaN NaN \n", "868 NaN stu.adu.edu.tr \n", "1176 NaN gmail.com \n", "\n", " other_email_domains \n", "34 [bsg.ox.ac.uk] \n", "47 [foxmail.com] \n", "299 [nhs.net] \n", "868 [gmail.com] \n", "1176 [yildiz.edu.tr] " ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['other_email_domains'].notna()].head()" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "df['n_emails'] = df['other_emails'].str.len()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidn_emails
20397180000-0003-4171-383512.0
571980000-0001-6239-29689.0
105245090000-0003-2290-28177.0
77852160000-0003-2151-40897.0
35563860000-0001-9084-31566.0
.........
107470350000-0003-4998-1551NaN
107470360000-0003-4998-4111NaN
107470370000-0003-4998-6045NaN
107470380000-0003-4998-8868NaN
107470390000-0003-4999-7916NaN
\n", "

10744621 rows × 2 columns

\n", "
" ], "text/plain": [ " orcid n_emails\n", "2039718 0000-0003-4171-3835 12.0\n", "57198 0000-0001-6239-2968 9.0\n", "10524509 0000-0003-2290-2817 7.0\n", "7785216 0000-0003-2151-4089 7.0\n", "3556386 0000-0001-9084-3156 6.0\n", "... ... ...\n", "10747035 0000-0003-4998-1551 NaN\n", "10747036 0000-0003-4998-4111 NaN\n", "10747037 0000-0003-4998-6045 NaN\n", "10747038 0000-0003-4998-8868 NaN\n", "10747039 0000-0003-4999-7916 NaN\n", "\n", "[10744621 rows x 2 columns]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values('n_emails', ascending=False)[['orcid', 'n_emails']]" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "grouped_other_emails = df[['orcid', 'other_email_domains']]\\\n", " .explode('other_email_domains')\\\n", " .reset_index(drop=True)\\\n", " .groupby('other_email_domains')\\\n", " .count()\\\n", " .sort_values('orcid', ascending=False)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "gmail.com", "hotmail.com", "yahoo.com", "163.com", "qq.com", "outlook.com", "126.com", "usp.br", "ieee.org", "mail.ru", "yahoo.com.br", "unesp.br", "sbs.ox.ac.uk", "yuhs.ac", "naver.com", "icloud.com", "ua.pt", "uq.edu.au", "foxmail.com", "cam.ac.uk", "ukr.net", "law.ox.ac.uk", "imperial.ac.uk", "mit.edu", "monash.edu", "ucl.ac.uk", "education.ox.ac.uk", "stanford.edu", "ucm.es", "conted.ox.ac.uk" ], "y": [ 10856, 1521, 1263, 763, 755, 422, 256, 235, 223, 147, 146, 138, 136, 130, 128, 113, 92, 90, 90, 81, 76, 75, 75, 74, 69, 67, 67, 66, 65, 64 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Top 30 other email domains" }, "xaxis": { "range": [ -0.5, 29.5 ], "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data = [\n", " go.Bar(\n", " x=grouped_other_emails[:30].sort_values(by=['orcid'], ascending=False).index,\n", " y=grouped_other_emails[:30].sort_values(by=['orcid'], ascending=False)['orcid']\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Top 30 other email domains',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Email speculation" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_emailother_emailskeywordsexternal_idseducationemploymentn_worksworks_sourceprimary_email_domainother_email_domainsn_emails
470000-0001-5017-1295111XinfengTangNaNNaNNaNNaN[tang.xinfeng@foxmail.com]NaN[[Scopus Author ID, 56927186900]][[, , University of Hong Kong, Hong Kong, , HK...NaN11[Scopus - Elsevier, Xinfeng Tang]NaN[foxmail.com]1.0
2990000-0001-5109-3989111colintysallNaNNaNNaNNaN[colin.tysall@nhs.net]NaNNaNNaN[[Associate Mental Health Act Manager, Coventr...0NaNNaN[nhs.net]1.0
12960000-0001-5476-0126111Aura WindyHernández CetinaNaNNaNNaNNaN[u0902038@unimilitar.edu.co]NaNNaN[[, Profesional en Relaciones Internacionales ...[[Asistente de Investigación, Pontificia Unive...1[Aura Windy Hernández Cetina]NaN[unimilitar.edu.co]1.0
14290000-0001-5522-427X111SüleymanÖzenNaNNaN[[Academic CV, https://akademik.yok.gov.tr/Aka...NaN[suleyman.ozen@btu.edu.tr][construction materials, superplasticizers, co...[[Scopus Author ID, 57188750603]][[Civil Engineering, MSc and PhD, Uludağ Unive...[[Dr., Bursa Technical University, Bursa, , TR...7[Scopus - Elsevier, Crossref]NaN[btu.edu.tr]1.0
16280000-0001-5597-3115111WadeHarrisonNaNNaNNaNNaN[wade_harrison@unc.edu]NaNNaN[[, MD, Dartmouth College Geisel School of Med...[[Clinical Instructor / Research Fellow, Unive...7[Wade Harrison]NaN[unc.edu]1.0
...............................................................
107436580000-0003-3740-8352111RuiZhangNaNNaNNaNNaN[zhang-r15@mails.tsinghua.edu.cn][Lithium metal batteries, Graphene][[ResearcherID, B-3843-2015]][[Department of Chemical Engineering, Ph.D. st...NaN15[ResearcherID, Crossref]NaN[mails.tsinghua.edu.cn]1.0
107448760000-0003-4192-6451111Sanjib RajPandeyNaNNaN[[Personal, https://www.sanjibpandey.wix.com/p...NaN[srpandey@gmail.com]NaNNaN[[Computing and Information System, PhD, Unive...[[Software Developer & Research Associate, Oxl...11[BASE - Bielefeld Academic Search Engine, Dr. ...NaN[gmail.com]1.0
107452740000-0003-4333-9728111MarioDe la Fuente LloredaPerson in charge to coordinate the scientific ...[M.de la Fuente, De la Fuente, M.][[researchgate profile, https://www.researchga...NaN[mariofuente@gmail.com][vineyard management, grapevine, viticulture, ...[[Scopus Author ID, 47960975000]][[Producción Vegetal, Doctor en Viticultura, U...NaN3[Scopus - Elsevier]NaN[gmail.com]1.0
107454170000-0003-4383-4745111JieYangNaNNaNNaNNaN[jyang@esat.kuleuven.be]NaNNaN[[faculty of engineering science, Dr., KU Leuv...NaN0NaNNaN[esat.kuleuven.be]1.0
107467020000-0003-4878-2737111AlekseyAdamtsevichNaNNaN[[Moscow State University of Civil Engineering...NaN[AdamtsevichAO@mgsu.ru][concrete, calorimetry, cement, construction, ...[[Scopus Author ID, 56301531000], [ResearcherI...[[, Engineer (Industrial and Civil Engineering...[[Senior Researcher, Moscow State University o...25[Scopus - Elsevier, ResearcherID]NaN[mgsu.ru]1.0
\n", "

19409 rows × 20 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email \\\n", "47 0000-0001-5017-1295 1 1 \n", "299 0000-0001-5109-3989 1 1 \n", "1296 0000-0001-5476-0126 1 1 \n", "1429 0000-0001-5522-427X 1 1 \n", "1628 0000-0001-5597-3115 1 1 \n", "... ... ... ... \n", "10743658 0000-0003-3740-8352 1 1 \n", "10744876 0000-0003-4192-6451 1 1 \n", "10745274 0000-0003-4333-9728 1 1 \n", "10745417 0000-0003-4383-4745 1 1 \n", "10746702 0000-0003-4878-2737 1 1 \n", "\n", " verified_primary_email given_names family_name \\\n", "47 1 Xinfeng Tang \n", "299 1 colin tysall \n", "1296 1 Aura Windy Hernández Cetina \n", "1429 1 Süleyman Özen \n", "1628 1 Wade Harrison \n", "... ... ... ... \n", "10743658 1 Rui Zhang \n", "10744876 1 Sanjib Raj Pandey \n", "10745274 1 Mario De la Fuente Lloreda \n", "10745417 1 Jie Yang \n", "10746702 1 Aleksey Adamtsevich \n", "\n", " biography \\\n", "47 NaN \n", "299 NaN \n", "1296 NaN \n", "1429 NaN \n", "1628 NaN \n", "... ... \n", "10743658 NaN \n", "10744876 NaN \n", "10745274 Person in charge to coordinate the scientific ... \n", "10745417 NaN \n", "10746702 NaN \n", "\n", " other_names \\\n", "47 NaN \n", "299 NaN \n", "1296 NaN \n", "1429 NaN \n", "1628 NaN \n", "... ... \n", "10743658 NaN \n", "10744876 NaN \n", "10745274 [M.de la Fuente, De la Fuente, M.] \n", "10745417 NaN \n", "10746702 NaN \n", "\n", " urls primary_email \\\n", "47 NaN NaN \n", "299 NaN NaN \n", "1296 NaN NaN \n", "1429 [[Academic CV, https://akademik.yok.gov.tr/Aka... NaN \n", "1628 NaN NaN \n", "... ... ... \n", "10743658 NaN NaN \n", "10744876 [[Personal, https://www.sanjibpandey.wix.com/p... NaN \n", "10745274 [[researchgate profile, https://www.researchga... NaN \n", "10745417 NaN NaN \n", "10746702 [[Moscow State University of Civil Engineering... NaN \n", "\n", " other_emails \\\n", "47 [tang.xinfeng@foxmail.com] \n", "299 [colin.tysall@nhs.net] \n", "1296 [u0902038@unimilitar.edu.co] \n", "1429 [suleyman.ozen@btu.edu.tr] \n", "1628 [wade_harrison@unc.edu] \n", "... ... \n", "10743658 [zhang-r15@mails.tsinghua.edu.cn] \n", "10744876 [srpandey@gmail.com] \n", "10745274 [mariofuente@gmail.com] \n", "10745417 [jyang@esat.kuleuven.be] \n", "10746702 [AdamtsevichAO@mgsu.ru] \n", "\n", " keywords \\\n", "47 NaN \n", "299 NaN \n", "1296 NaN \n", "1429 [construction materials, superplasticizers, co... \n", "1628 NaN \n", "... ... \n", "10743658 [Lithium metal batteries, Graphene] \n", "10744876 NaN \n", "10745274 [vineyard management, grapevine, viticulture, ... \n", "10745417 NaN \n", "10746702 [concrete, calorimetry, cement, construction, ... \n", "\n", " external_ids \\\n", "47 [[Scopus Author ID, 56927186900]] \n", "299 NaN \n", "1296 NaN \n", "1429 [[Scopus Author ID, 57188750603]] \n", "1628 NaN \n", "... ... \n", "10743658 [[ResearcherID, B-3843-2015]] \n", "10744876 NaN \n", "10745274 [[Scopus Author ID, 47960975000]] \n", "10745417 NaN \n", "10746702 [[Scopus Author ID, 56301531000], [ResearcherI... \n", "\n", " education \\\n", "47 [[, , University of Hong Kong, Hong Kong, , HK... \n", "299 NaN \n", "1296 [[, Profesional en Relaciones Internacionales ... \n", "1429 [[Civil Engineering, MSc and PhD, Uludağ Unive... \n", "1628 [[, MD, Dartmouth College Geisel School of Med... \n", "... ... \n", "10743658 [[Department of Chemical Engineering, Ph.D. st... \n", "10744876 [[Computing and Information System, PhD, Unive... \n", "10745274 [[Producción Vegetal, Doctor en Viticultura, U... \n", "10745417 [[faculty of engineering science, Dr., KU Leuv... \n", "10746702 [[, Engineer (Industrial and Civil Engineering... \n", "\n", " employment n_works \\\n", "47 NaN 11 \n", "299 [[Associate Mental Health Act Manager, Coventr... 0 \n", "1296 [[Asistente de Investigación, Pontificia Unive... 1 \n", "1429 [[Dr., Bursa Technical University, Bursa, , TR... 7 \n", "1628 [[Clinical Instructor / Research Fellow, Unive... 7 \n", "... ... ... \n", "10743658 NaN 15 \n", "10744876 [[Software Developer & Research Associate, Oxl... 11 \n", "10745274 NaN 3 \n", "10745417 NaN 0 \n", "10746702 [[Senior Researcher, Moscow State University o... 25 \n", "\n", " works_source \\\n", "47 [Scopus - Elsevier, Xinfeng Tang] \n", "299 NaN \n", "1296 [Aura Windy Hernández Cetina] \n", "1429 [Scopus - Elsevier, Crossref] \n", "1628 [Wade Harrison] \n", "... ... \n", "10743658 [ResearcherID, Crossref] \n", "10744876 [BASE - Bielefeld Academic Search Engine, Dr. ... \n", "10745274 [Scopus - Elsevier] \n", "10745417 NaN \n", "10746702 [Scopus - Elsevier, ResearcherID] \n", "\n", " primary_email_domain other_email_domains n_emails \n", "47 NaN [foxmail.com] 1.0 \n", "299 NaN [nhs.net] 1.0 \n", "1296 NaN [unimilitar.edu.co] 1.0 \n", "1429 NaN [btu.edu.tr] 1.0 \n", "1628 NaN [unc.edu] 1.0 \n", "... ... ... ... \n", "10743658 NaN [mails.tsinghua.edu.cn] 1.0 \n", "10744876 NaN [gmail.com] 1.0 \n", "10745274 NaN [gmail.com] 1.0 \n", "10745417 NaN [esat.kuleuven.be] 1.0 \n", "10746702 NaN [mgsu.ru] 1.0 \n", "\n", "[19409 rows x 20 columns]" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['primary_email'].isna() & df['other_emails'].notna()]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## URLs" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "def extract_url_domains(lst):\n", " domains = []\n", " for e in lst:\n", " # e[0] is a string describing the url\n", " # e[1] is the url\n", " domain = tldextract.extract(e[1])\n", " domains.append(domain.registered_domain)\n", " return domains" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "df['url_domains'] = df['urls'].apply(lambda x: extract_url_domains(x) if isinstance(x, list) else x)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...keywordsexternal_idseducationemploymentn_worksworks_sourceprimary_email_domainother_email_domainsn_emailsurl_domains
50000-0001-5001-4994111SirenRühsI am an oceanographer studying the interannual...[Siren Ruehs][[ResearchGate, https://www.researchgate.net/p...NaN...NaNNaNNaNNaN11[Siren Rühs]NaNNaNNaN[researchgate.net]
140000-0001-5004-7761111scaffoldinghireNaN[The first feature that you have to check in t...[[scaffolding hire Wellington, https://www.tig...NaN...[scaffolding hire Wellington]NaNNaNNaN0NaNNaNNaNNaN[tigerscaffolds.co.nz]
150000-0001-5005-0557111SenRTNaNNaN[[Research on Psychology, psychiatry, Genetics...NaN...NaNNaNNaNNaN0NaNNaNNaNNaN[corticalbrain.com]
290000-0001-5009-8091111GabrielaMadrugaPossui graduação em Medicina Veterinaria pela ...[Gabriela Morais Madruga][[Curriculo lattes, http://buscatextual.cnpq.b...NaN...[veterinary ophthalmology]NaN[[Surgery in small animal, PhD, Universidade E...[[PhD , University of Minnesota, Minneapolis, ...14[Gabriela Madruga]NaNNaNNaN[cnpq.br]
300000-0001-5010-9539111Sangram KeshariSahuNaN[sk-sahu][[Academic webpage, https://sksahu.net]]NaN...[Computational Genomics and Bioinformatics][[Loop profile, 1098977]][[Centre for Bioinformatics, M.Sc. Bioinformat...[[Bioinformatics Junior Research Fellow, India...3[Crossref Metadata Search, Sangram Keshari Sahu]NaNNaNNaN[sksahu.net]
\n", "

5 rows × 21 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "5 0000-0001-5001-4994 1 1 1 \n", "14 0000-0001-5004-7761 1 1 1 \n", "15 0000-0001-5005-0557 1 1 1 \n", "29 0000-0001-5009-8091 1 1 1 \n", "30 0000-0001-5010-9539 1 1 1 \n", "\n", " given_names family_name \\\n", "5 Siren Rühs \n", "14 scaffolding hire \n", "15 Sen RT \n", "29 Gabriela Madruga \n", "30 Sangram Keshari Sahu \n", "\n", " biography \\\n", "5 I am an oceanographer studying the interannual... \n", "14 NaN \n", "15 NaN \n", "29 Possui graduação em Medicina Veterinaria pela ... \n", "30 NaN \n", "\n", " other_names \\\n", "5 [Siren Ruehs] \n", "14 [The first feature that you have to check in t... \n", "15 NaN \n", "29 [Gabriela Morais Madruga] \n", "30 [sk-sahu] \n", "\n", " urls primary_email ... \\\n", "5 [[ResearchGate, https://www.researchgate.net/p... NaN ... \n", "14 [[scaffolding hire Wellington, https://www.tig... NaN ... \n", "15 [[Research on Psychology, psychiatry, Genetics... NaN ... \n", "29 [[Curriculo lattes, http://buscatextual.cnpq.b... NaN ... \n", "30 [[Academic webpage, https://sksahu.net]] NaN ... \n", "\n", " keywords external_ids \\\n", "5 NaN NaN \n", "14 [scaffolding hire Wellington] NaN \n", "15 NaN NaN \n", "29 [veterinary ophthalmology] NaN \n", "30 [Computational Genomics and Bioinformatics] [[Loop profile, 1098977]] \n", "\n", " education \\\n", "5 NaN \n", "14 NaN \n", "15 NaN \n", "29 [[Surgery in small animal, PhD, Universidade E... \n", "30 [[Centre for Bioinformatics, M.Sc. Bioinformat... \n", "\n", " employment n_works \\\n", "5 NaN 11 \n", "14 NaN 0 \n", "15 NaN 0 \n", "29 [[PhD , University of Minnesota, Minneapolis, ... 14 \n", "30 [[Bioinformatics Junior Research Fellow, India... 3 \n", "\n", " works_source primary_email_domain \\\n", "5 [Siren Rühs] NaN \n", "14 NaN NaN \n", "15 NaN NaN \n", "29 [Gabriela Madruga] NaN \n", "30 [Crossref Metadata Search, Sangram Keshari Sahu] NaN \n", "\n", " other_email_domains n_emails url_domains \n", "5 NaN NaN [researchgate.net] \n", "14 NaN NaN [tigerscaffolds.co.nz] \n", "15 NaN NaN [corticalbrain.com] \n", "29 NaN NaN [cnpq.br] \n", "30 NaN NaN [sksahu.net] \n", "\n", "[5 rows x 21 columns]" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['url_domains'].notna()].head()" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "df['n_urls'] = df['url_domains'].str.len()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidn_urls
705770000-0002-1234-835X219.0
51645410000-0001-7478-4539174.0
12152250000-0002-7392-3792169.0
102405100000-0002-6938-9638152.0
40042810000-0002-5710-4041114.0
.........
107470350000-0003-4998-1551NaN
107470360000-0003-4998-4111NaN
107470370000-0003-4998-6045NaN
107470380000-0003-4998-8868NaN
107470390000-0003-4999-7916NaN
\n", "

10744621 rows × 2 columns

\n", "
" ], "text/plain": [ " orcid n_urls\n", "70577 0000-0002-1234-835X 219.0\n", "5164541 0000-0001-7478-4539 174.0\n", "1215225 0000-0002-7392-3792 169.0\n", "10240510 0000-0002-6938-9638 152.0\n", "4004281 0000-0002-5710-4041 114.0\n", "... ... ...\n", "10747035 0000-0003-4998-1551 NaN\n", "10747036 0000-0003-4998-4111 NaN\n", "10747037 0000-0003-4998-6045 NaN\n", "10747038 0000-0003-4998-8868 NaN\n", "10747039 0000-0003-4999-7916 NaN\n", "\n", "[10744621 rows x 2 columns]" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values('n_urls', ascending=False)[['orcid', 'n_urls']]" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "0000-0002-1234-835X", "0000-0001-7478-4539", "0000-0002-7392-3792", "0000-0002-6938-9638", "0000-0002-5710-4041", "0000-0003-2450-090X", "0000-0002-3920-7389", "0000-0002-6689-4129", "0000-0002-4621-5571", "0000-0001-9131-1266", "0000-0002-7754-8889", "0000-0002-5250-1144", "0000-0002-9025-8632", "0000-0002-7456-3848", "0000-0003-0176-1293", "0000-0003-0321-7339", "0000-0002-8493-0402", "0000-0002-9965-2425", "0000-0001-8873-6677", "0000-0002-3997-5070", "0000-0002-1856-6905", "0000-0002-4062-3603", "0000-0002-4316-1467", "0000-0002-0752-7513", "0000-0001-5880-7091", "0000-0003-1524-6268", "0000-0003-0594-2462", "0000-0003-2593-7134", "0000-0002-1298-5252", "0000-0003-1761-3842", "0000-0003-2383-8386", "0000-0003-3546-2312", "0000-0002-2886-9248", "0000-0002-1929-6054", "0000-0003-2183-8112", "0000-0003-2407-3557", "0000-0002-4305-4215", "0000-0003-0796-0234", "0000-0001-7133-6896", "0000-0002-9276-6921", "0000-0002-7568-3403", "0000-0003-1484-6958", "0000-0002-4004-6666", "0000-0002-8208-0897", "0000-0002-9071-5450", "0000-0002-8116-9611", "0000-0003-4993-5555", "0000-0003-0930-6121", "0000-0003-4948-9268", "0000-0002-8122-879X", "0000-0001-9559-1103", "0000-0002-3277-9659", "0000-0003-2862-6315", "0000-0002-2000-8339", "0000-0001-5300-4601", "0000-0002-6547-0172", "0000-0003-4808-6619", "0000-0002-6254-8683", "0000-0002-0971-9375", "0000-0003-3933-0229", "0000-0002-4659-5391", "0000-0003-0694-1154", "0000-0002-2916-2893", "0000-0003-4501-3756", "0000-0001-6783-2037", "0000-0001-6461-2573", "0000-0001-5549-6822", "0000-0001-8978-4830", "0000-0003-4326-9336", "0000-0001-8096-4333", "0000-0002-8940-3177", "0000-0002-6680-1703", "0000-0002-5946-1595", "0000-0002-8593-9257", "0000-0002-7653-4899", "0000-0002-5196-4905", "0000-0001-8808-4867", "0000-0002-5139-2660", "0000-0001-6921-0426", "0000-0003-1815-1993", "0000-0002-7843-8497", "0000-0003-1675-2840", "0000-0001-8644-2114", "0000-0003-0907-9870", "0000-0001-8986-2528", "0000-0002-5265-6074", "0000-0001-7784-0583", "0000-0001-7550-5802", "0000-0001-6979-4273", "0000-0001-9102-8639", "0000-0002-3334-9386", "0000-0002-0696-8560", "0000-0002-7179-6953", "0000-0002-9771-600X", "0000-0002-8797-6502", "0000-0002-5241-1026", "0000-0001-7193-5039", "0000-0001-9119-5955", "0000-0003-2133-2648", "0000-0001-9026-4795" ], "y": [ 219, 174, 169, 152, 114, 114, 111, 104, 90, 83, 83, 81, 81, 80, 80, 80, 76, 73, 72, 71, 70, 69, 69, 68, 68, 68, 68, 67, 67, 66, 66, 65, 64, 61, 61, 59, 57, 57, 57, 57, 57, 57, 57, 56, 55, 55, 55, 55, 51, 50, 50, 50, 49, 49, 48, 48, 48, 48, 47, 47, 46, 46, 45, 45, 45, 45, 44, 43, 43, 43, 43, 42, 42, 42, 41, 41, 40, 40, 40, 39, 39, 39, 39, 38, 38, 38, 38, 38, 37, 37, 37, 37, 37, 36, 36, 36, 36, 36, 36, 36 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Top 100 ORCID with URLs" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data = [\n", " go.Bar(\n", " x=df.sort_values(by=['n_urls'], ascending=False)['orcid'][:100],\n", " y=df.sort_values(by=['n_urls'], ascending=False)['n_urls'][:100]\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Top 100 ORCID with URLs',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "grouped_urls = df[['orcid', 'url_domains']]\\\n", " .explode('url_domains')\\\n", " .reset_index(drop=True)\\\n", " .groupby('url_domains')\\\n", " .count()\\\n", " .sort_values('orcid', ascending=False)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "linkedin.com", "researchgate.net", "google.com", "cnpq.br", "academia.edu", "twitter.com", "facebook.com", "publons.com", "wordpress.com", "mendeley.com", "instagram.com", "github.io", "google.com.ua", "blogspot.com", "google.es", "github.com", "helsinki.fi", "unirioja.es", "youtube.com", "wixsite.com", "ku.dk", "scopus.com", "", "weebly.com", "us.es", "kth.se", "cityu.edu.hk", "kcl.ac.uk", "au.dk", "ucl.ac.uk" ], "y": [ 75344, 66267, 43468, 23936, 20786, 18017, 14552, 10339, 8883, 7003, 5532, 5371, 5273, 5158, 5070, 5053, 4682, 4549, 4196, 4053, 3730, 3481, 3332, 3083, 3029, 2944, 2719, 2711, 2640, 2581 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Top 30 URL domains" }, "xaxis": { "range": [ -0.5, 29.5 ], "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data = [\n", " go.Bar(\n", " x=grouped_urls[:30].sort_values(by=['orcid'], ascending=False).index,\n", " y=grouped_urls[:30].sort_values(by=['orcid'], ascending=False)['orcid']\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Top 30 URL domains',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12), range=TOP_RANGE)\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...external_idseducationemploymentn_worksworks_sourceprimary_email_domainother_email_domainsn_emailsurl_domainsn_urls
4828620000-0003-4948-9268111GustavoDuperréGustavo Norberto Duperré graduated in Arts and...[Gustavo Norberto Duperré, Duperré, G. N.][[Gis in Cultural Heritage - ICOMOS România, h...gustavo.duperre@usal.edu.ar...[[Scopus Author ID, 57195936346], [ResearcherI...[[Programme in History, History of Art and Ter...[[Titular Professor, Dirección General de Cult...13[Gustavo Duperré, Scopus - Elsevier, Publons, ...usal.edu.arNaNNaN[icomos.ro, unirioja.es, unirioja.es, unc.edu....51.0
5548590000-0002-1929-6054111Franklin AméricoCanaza ChoqueDocente-Investigador Social. Maestrando en Der...[Franklin Américo Canaza-Choque , Franklin A. ...[[Consejo Nacional de Ciencia, Tecnología e In...Leo_123fa@hotmail.com...[[ResearcherID, P-8613-2018], [Loop profile, 8...[[Facultad de Ciencias de la Educación , Maest...[[Investigador Social, Universidad Católica de...38[ResearcherID, BASE - Bielefeld Academic Searc...hotmail.com[gmail.com, gmail.com, hotmail.com, baldwin.ed...5.0[concytec.gob.pe, redalyc.org, redalyc.org, un...61.0
13810920000-0002-9025-8632111buycannabisdispensaryWe procure and deliver premium cannabis strain...[We procure and deliver premium cannabis strai...[[find your cannabis & marijuana dispensary , ...NaN...NaNNaNNaN10[goowonderland dispensary]NaNNaNNaN[goowonderland.com, goowonderland.com, goowond...81.0
26793530000-0003-2407-3557111AbdulAzizAbdul Aziz was born on May 25, 1973, in Brebes...[Abdul Aziz, Aziz, Abdul, Aziz, A., Aziz, Abd,...[[Google Scholar, https://scholar.google.com/c...NaN...NaN[[Ilmu Ekonomi, Dr, Universitas Borobudur, Jak...[[Assisten Professor/Dr, Institut Agama Islam ...72[BASE - Bielefeld Academic Search Engine, Abdu...NaNNaNNaN[google.com, syekhnurjati.ac.id, orcid.org, bl...59.0
33544300000-0002-3920-7389111А.ГусевSurname, Name Gusev Alexander LeonidovichDate...[Alexander L. Gusev , Alexander Leonidovich Gu...[[A.L. Gusev Alternative Energy and Ecology, ...NaN...[[ResearcherID, F-8048-2014], [Scopus Author I...[[Chemical technology and cryogenic-vacuum tec...[[General Director, Scientific Technical Centr...472[Publons, DataCite, Scopus - Elsevier, A.L. Gu...NaNNaNNaN[youtube.com, isjaee.com, researchgate.net, re...111.0
40042810000-0002-5710-4041111RyszardRomaniukProfessor of Electronics and Communications En...[R.Romaniuk, R.S.Romaniuk, Ryszard Romaniuk, R...[[Scholar Google, http://scholar.google.pl/cit...rrom@ise.pw.edu.pl...[[ISNI, 0000000071432485], [ResearcherID, B-91...[[Faculty of Electronics and Information Techn...[[Professor, Institute Director, Politechnika ...5008[INSPIRE-HEP, ResearcherID, ISNI2ORCID search ...ise.pw.edu.pl[ise.pw.edu.pl, elka.pw.edu.pl, cern.ch]3.0[google.pl, publons.com, scopus.com, mendeley....114.0
40224800000-0003-2450-090X111EduardBabulakProfessor Eduard Babulak is accomplished inter...[Professor Eduard Babulak][[Honorary Chair, Chief Mentor & Senior Adviso...NaN...[[Scopus Author ID, 6506867432], [ResearcherID...[[Information Technology, Doctor Habilitated (...[[Consultant, Horizon 2020 Framework Programme...274[The Lens, BASE - Bielefeld Academic Search En...NaNNaNNaN[worldassessmentcouncil.org, spseke.sk, bcs.or...114.0
63353570000-0003-2593-7134111AanJaelaniAll my papers can be downloaded from portal:Re...[Jaelani, A., Jaelani, Aan][[Microsoft Academic Research, https://academi...aan_jaelani@syekhnurjati.ac.id...[[Scopus Author ID, 57195963463], [Loop profil...[[Post Graduate, S3/Dr, Universitas Islam Nege...[[Dr, Institut Agama Islam Negeri Syekh Nurjat...79[Publons, Aan Jaelani, Scopus - Elsevier, Dime...syekhnurjati.ac.id[gmail.com]1.0[microsoft.com, twitter.com, academia.edu, aca...67.0
64898380000-0002-9965-2425111JaroslawSpychalaJaroslaw Spychala has received a doctoral degr...[Jaroslaw Jozef Spychala][[RESUME, http://www.biowebspin.com/wp-content...NaN...[[Scopus Author ID, 7006745874]][[Department of Chemistry, Postdoctoral Associ...[[Assistant Professor, Adam Mickiewicz Univers...29[Scopus - Elsevier]NaNNaNNaN[biowebspin.com, biowebspin.com, google.com, l...73.0
75705840000-0003-2183-8112111Pelayo MunhozOleaPós-Doutorado em Gestão Ambiental pela Univers...[ Munhoz, Pelayo Olea, Olea, Pelayo, Olea, P...[[Currículo Lattes, http://lattes.cnpq.br/6209...NaN...[[Scopus Author ID, 55175503300], [ResearcherI...[[, Postdoctoral in Environmental Sustainabili...[[Professor, Universidade Federal do Rio Grand...1105[The Lens, Pelayo Munhoz Olea, Dimensions, BAS...NaNNaNNaN[cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c...61.0
102405100000-0002-6938-9638111AdolfoCatral SanabriaMy education is in computer science, mathemati...NaN[[ResearchGate Adolfo Catral , https://www.res...NaN...[[Loop profile, 747193]][[Education, Capacitación para la enseñanza en...NaN2023[BASE - Bielefeld Academic Search Engine, Data...NaNNaNNaN[researchgate.net, youtube.com, linkedin.com, ...152.0
104483040000-0002-4062-3603111JUAN DE DIOSBELTRÁN MANCILLAJUAN DE DIOS BELTRÁN MANCILLA (*) Filósofo aut...[Juan de Dios Beltrán Mancilla, FILÓSOFO AUTOD...[[01.- Juan de Dios Beltrán Mancilla. Teoría O...NaN...NaN[[, DIPLOMADO EN PRACTICAS DIRECTIVAS PARA OR...[[INSPECTOR GENERAL JORNADA VESPERTINA // De 2...11[JUAN DE DIOS BELTR´´ÁN MANCILLA]NaNNaNNaN[yumpu.com, ijopm.org, google.com, blogspot.co...69.0
106638940000-0002-3997-5070111Dr. ParameshachariB DDr. Parameshachari B DACM Distinguished Speake...[Dr. PARAMESHACHARI B D][[GSSSIETW,MYSURU, http://geethashishu.in/], [...NaN...[[ResearcherID, F-7045-2018], [Scopus Author I...[[Electronics and Communication Engineering, P...[[ACM Distinguished Speaker (Volunteer), Assoc...93[Publons, Multidisciplinary Digital Publishing...NaNNaNNaN[geethashishu.in, geethashishu.in, acm.org, go...71.0
\n", "

13 rows × 22 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email \\\n", "482862 0000-0003-4948-9268 1 1 \n", "554859 0000-0002-1929-6054 1 1 \n", "1381092 0000-0002-9025-8632 1 1 \n", "2679353 0000-0003-2407-3557 1 1 \n", "3354430 0000-0002-3920-7389 1 1 \n", "4004281 0000-0002-5710-4041 1 1 \n", "4022480 0000-0003-2450-090X 1 1 \n", "6335357 0000-0003-2593-7134 1 1 \n", "6489838 0000-0002-9965-2425 1 1 \n", "7570584 0000-0003-2183-8112 1 1 \n", "10240510 0000-0002-6938-9638 1 1 \n", "10448304 0000-0002-4062-3603 1 1 \n", "10663894 0000-0002-3997-5070 1 1 \n", "\n", " verified_primary_email given_names family_name \\\n", "482862 1 Gustavo Duperré \n", "554859 1 Franklin Américo Canaza Choque \n", "1381092 1 buycannabis dispensary \n", "2679353 1 Abdul Aziz \n", "3354430 1 А. Гусев \n", "4004281 1 Ryszard Romaniuk \n", "4022480 1 Eduard Babulak \n", "6335357 1 Aan Jaelani \n", "6489838 1 Jaroslaw Spychala \n", "7570584 1 Pelayo Munhoz Olea \n", "10240510 1 Adolfo Catral Sanabria \n", "10448304 1 JUAN DE DIOS BELTRÁN MANCILLA \n", "10663894 1 Dr. Parameshachari B D \n", "\n", " biography \\\n", "482862 Gustavo Norberto Duperré graduated in Arts and... \n", "554859 Docente-Investigador Social. Maestrando en Der... \n", "1381092 We procure and deliver premium cannabis strain... \n", "2679353 Abdul Aziz was born on May 25, 1973, in Brebes... \n", "3354430 Surname, Name Gusev Alexander LeonidovichDate... \n", "4004281 Professor of Electronics and Communications En... \n", "4022480 Professor Eduard Babulak is accomplished inter... \n", "6335357 All my papers can be downloaded from portal:Re... \n", "6489838 Jaroslaw Spychala has received a doctoral degr... \n", "7570584 Pós-Doutorado em Gestão Ambiental pela Univers... \n", "10240510 My education is in computer science, mathemati... \n", "10448304 JUAN DE DIOS BELTRÁN MANCILLA (*) Filósofo aut... \n", "10663894 Dr. Parameshachari B DACM Distinguished Speake... \n", "\n", " other_names \\\n", "482862 [Gustavo Norberto Duperré, Duperré, G. N.] \n", "554859 [Franklin Américo Canaza-Choque , Franklin A. ... \n", "1381092 [We procure and deliver premium cannabis strai... \n", "2679353 [Abdul Aziz, Aziz, Abdul, Aziz, A., Aziz, Abd,... \n", "3354430 [Alexander L. Gusev , Alexander Leonidovich Gu... \n", "4004281 [R.Romaniuk, R.S.Romaniuk, Ryszard Romaniuk, R... \n", "4022480 [Professor Eduard Babulak] \n", "6335357 [Jaelani, A., Jaelani, Aan] \n", "6489838 [Jaroslaw Jozef Spychala] \n", "7570584 [ Munhoz, Pelayo Olea, Olea, Pelayo, Olea, P... \n", "10240510 NaN \n", "10448304 [Juan de Dios Beltrán Mancilla, FILÓSOFO AUTOD... \n", "10663894 [Dr. PARAMESHACHARI B D] \n", "\n", " urls \\\n", "482862 [[Gis in Cultural Heritage - ICOMOS România, h... \n", "554859 [[Consejo Nacional de Ciencia, Tecnología e In... \n", "1381092 [[find your cannabis & marijuana dispensary , ... \n", "2679353 [[Google Scholar, https://scholar.google.com/c... \n", "3354430 [[A.L. Gusev Alternative Energy and Ecology, ... \n", "4004281 [[Scholar Google, http://scholar.google.pl/cit... \n", "4022480 [[Honorary Chair, Chief Mentor & Senior Adviso... \n", "6335357 [[Microsoft Academic Research, https://academi... \n", "6489838 [[RESUME, http://www.biowebspin.com/wp-content... \n", "7570584 [[Currículo Lattes, http://lattes.cnpq.br/6209... \n", "10240510 [[ResearchGate Adolfo Catral , https://www.res... \n", "10448304 [[01.- Juan de Dios Beltrán Mancilla. Teoría O... \n", "10663894 [[GSSSIETW,MYSURU, http://geethashishu.in/], [... \n", "\n", " primary_email ... \\\n", "482862 gustavo.duperre@usal.edu.ar ... \n", "554859 Leo_123fa@hotmail.com ... \n", "1381092 NaN ... \n", "2679353 NaN ... \n", "3354430 NaN ... \n", "4004281 rrom@ise.pw.edu.pl ... \n", "4022480 NaN ... \n", "6335357 aan_jaelani@syekhnurjati.ac.id ... \n", "6489838 NaN ... \n", "7570584 NaN ... \n", "10240510 NaN ... \n", "10448304 NaN ... \n", "10663894 NaN ... \n", "\n", " external_ids \\\n", "482862 [[Scopus Author ID, 57195936346], [ResearcherI... \n", "554859 [[ResearcherID, P-8613-2018], [Loop profile, 8... \n", "1381092 NaN \n", "2679353 NaN \n", "3354430 [[ResearcherID, F-8048-2014], [Scopus Author I... \n", "4004281 [[ISNI, 0000000071432485], [ResearcherID, B-91... \n", "4022480 [[Scopus Author ID, 6506867432], [ResearcherID... \n", "6335357 [[Scopus Author ID, 57195963463], [Loop profil... \n", "6489838 [[Scopus Author ID, 7006745874]] \n", "7570584 [[Scopus Author ID, 55175503300], [ResearcherI... \n", "10240510 [[Loop profile, 747193]] \n", "10448304 NaN \n", "10663894 [[ResearcherID, F-7045-2018], [Scopus Author I... \n", "\n", " education \\\n", "482862 [[Programme in History, History of Art and Ter... \n", "554859 [[Facultad de Ciencias de la Educación , Maest... \n", "1381092 NaN \n", "2679353 [[Ilmu Ekonomi, Dr, Universitas Borobudur, Jak... \n", "3354430 [[Chemical technology and cryogenic-vacuum tec... \n", "4004281 [[Faculty of Electronics and Information Techn... \n", "4022480 [[Information Technology, Doctor Habilitated (... \n", "6335357 [[Post Graduate, S3/Dr, Universitas Islam Nege... \n", "6489838 [[Department of Chemistry, Postdoctoral Associ... \n", "7570584 [[, Postdoctoral in Environmental Sustainabili... \n", "10240510 [[Education, Capacitación para la enseñanza en... \n", "10448304 [[, DIPLOMADO EN PRACTICAS DIRECTIVAS PARA OR... \n", "10663894 [[Electronics and Communication Engineering, P... \n", "\n", " employment n_works \\\n", "482862 [[Titular Professor, Dirección General de Cult... 13 \n", "554859 [[Investigador Social, Universidad Católica de... 38 \n", "1381092 NaN 10 \n", "2679353 [[Assisten Professor/Dr, Institut Agama Islam ... 72 \n", "3354430 [[General Director, Scientific Technical Centr... 472 \n", "4004281 [[Professor, Institute Director, Politechnika ... 5008 \n", "4022480 [[Consultant, Horizon 2020 Framework Programme... 274 \n", "6335357 [[Dr, Institut Agama Islam Negeri Syekh Nurjat... 79 \n", "6489838 [[Assistant Professor, Adam Mickiewicz Univers... 29 \n", "7570584 [[Professor, Universidade Federal do Rio Grand... 1105 \n", "10240510 NaN 2023 \n", "10448304 [[INSPECTOR GENERAL JORNADA VESPERTINA // De 2... 11 \n", "10663894 [[ACM Distinguished Speaker (Volunteer), Assoc... 93 \n", "\n", " works_source \\\n", "482862 [Gustavo Duperré, Scopus - Elsevier, Publons, ... \n", "554859 [ResearcherID, BASE - Bielefeld Academic Searc... \n", "1381092 [goowonderland dispensary] \n", "2679353 [BASE - Bielefeld Academic Search Engine, Abdu... \n", "3354430 [Publons, DataCite, Scopus - Elsevier, A.L. Gu... \n", "4004281 [INSPIRE-HEP, ResearcherID, ISNI2ORCID search ... \n", "4022480 [The Lens, BASE - Bielefeld Academic Search En... \n", "6335357 [Publons, Aan Jaelani, Scopus - Elsevier, Dime... \n", "6489838 [Scopus - Elsevier] \n", "7570584 [The Lens, Pelayo Munhoz Olea, Dimensions, BAS... \n", "10240510 [BASE - Bielefeld Academic Search Engine, Data... \n", "10448304 [JUAN DE DIOS BELTR´´ÁN MANCILLA] \n", "10663894 [Publons, Multidisciplinary Digital Publishing... \n", "\n", " primary_email_domain \\\n", "482862 usal.edu.ar \n", "554859 hotmail.com \n", "1381092 NaN \n", "2679353 NaN \n", "3354430 NaN \n", "4004281 ise.pw.edu.pl \n", "4022480 NaN \n", "6335357 syekhnurjati.ac.id \n", "6489838 NaN \n", "7570584 NaN \n", "10240510 NaN \n", "10448304 NaN \n", "10663894 NaN \n", "\n", " other_email_domains n_emails \\\n", "482862 NaN NaN \n", "554859 [gmail.com, gmail.com, hotmail.com, baldwin.ed... 5.0 \n", "1381092 NaN NaN \n", "2679353 NaN NaN \n", "3354430 NaN NaN \n", "4004281 [ise.pw.edu.pl, elka.pw.edu.pl, cern.ch] 3.0 \n", "4022480 NaN NaN \n", "6335357 [gmail.com] 1.0 \n", "6489838 NaN NaN \n", "7570584 NaN NaN \n", "10240510 NaN NaN \n", "10448304 NaN NaN \n", "10663894 NaN NaN \n", "\n", " url_domains n_urls \n", "482862 [icomos.ro, unirioja.es, unirioja.es, unc.edu.... 51.0 \n", "554859 [concytec.gob.pe, redalyc.org, redalyc.org, un... 61.0 \n", "1381092 [goowonderland.com, goowonderland.com, goowond... 81.0 \n", "2679353 [google.com, syekhnurjati.ac.id, orcid.org, bl... 59.0 \n", "3354430 [youtube.com, isjaee.com, researchgate.net, re... 111.0 \n", "4004281 [google.pl, publons.com, scopus.com, mendeley.... 114.0 \n", "4022480 [worldassessmentcouncil.org, spseke.sk, bcs.or... 114.0 \n", "6335357 [microsoft.com, twitter.com, academia.edu, aca... 67.0 \n", "6489838 [biowebspin.com, biowebspin.com, google.com, l... 73.0 \n", "7570584 [cnpq.br, cnpq.br, cnpq.br, cnpq.br, publons.c... 61.0 \n", "10240510 [researchgate.net, youtube.com, linkedin.com, ... 152.0 \n", "10448304 [yumpu.com, ijopm.org, google.com, blogspot.co... 69.0 \n", "10663894 [geethashishu.in, geethashishu.in, acm.org, go... 71.0 \n", "\n", "[13 rows x 22 columns]" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[(df['url_domains'].str.len() > 50) & (df['n_works'] > 0)]" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...external_idseducationemploymentn_worksworks_sourceprimary_email_domainother_email_domainsn_emailsurl_domainsn_urls
455660000-0003-1948-3180111MarkKatzMark N. Katz is a professor of government and ...NaN[[Adjusting to Change: American Foreign Policy...NaN...[[Scopus Author ID, 25649901800]][[Political Science, Ph.D., Massachusetts Inst...[[Professor of Government and Politics, George...58[Scopus - Elsevier]NaNNaNNaN[wordpress.com, marknkatz.com, gmu.edu, atlant...16.0
726740000-0002-2000-8339111Phòng khám tư nhân Hà NộiNaNNaNNaN[[Sức khỏe, https://onhealth.vn/], [Khám phụ k...NaN...NaNNaNNaN4[Phòng khám tư nhân Hà Nội]NaNNaNNaN[onhealth.vn, onhealth.vn, onhealth.vn, onheal...49.0
1728200000-0001-9293-2224111Juan CarlosGarcia HoyosMy name is Juan Carlos García Hoyos. I was bor...[Juan Carlos Garcia Hoyos /, EXTRATERRANOVAS /...[[Air Force Office of Scientific Research (WRI...NaN...NaN[[Faculty of Philosophy, History - Ph.D., Char...[[responsible for the Project Service Level Ag...20[Juan Carlos Garcia Hoyos]NaNNaNNaN[af.mil, gst.com, govtribe.com, sbir.gov, open...28.0
2095050000-0003-3045-0056111AnandaMajumdarI am Ananda Majumdar, Child Care Educator at B...NaN[[Migration Scholar and Ananda , https://grfdt...NaN...NaN[[Education , B.Ed. After Degree , University ...[[General Coordinator- University of Alberta C...43[Ananda Majumdar]NaNNaNNaN[grfdt.com, linkedin.com, academia.edu, resear...24.0
2598770000-0003-1815-5732111JAS(Jurnal Akuntansi Syariah)JAS (Jurnal Akuntansi Syariah) published in pr...NaN[[Website, https://ejournal.stiesyariahbengkal...NaN...NaNNaNNaN67[JAS (Jurnal Akuntansi Syariah)]NaNNaNNaN[stiesyariahbengkalis.ac.id, lipi.go.id, cross...17.0
..................................................................
104948200000-0002-1324-7171111Vanesa NataliaRodriguezNombre y Apellido: Vanesa Natalia Rodriguez. ...[Vanesa Rodriguez, Vanesa N. Rodriguez][[De rufianes y franchutas Representaciones y ...NaN...NaN[[, Maestría en Ciencias Sociales con Mención ...[[Profesora, Universidad Nacional de La Matanz...7[Vanesa Natalia Rodriguez]NaNNaNNaN[unlam.edu.ar, unirioja.es, amazon.fr, abebook...19.0
104958060000-0002-1700-8311111Fix-ITRiteNaN[Best Heating & Plumbing Company][[Website, https://fix-itrite.com], [Muckrack,...NaN...NaNNaNNaN1[Fix-It Rite]NaNNaNNaN[fix-itrite.com, muckrack.com, tumblr.com, dri...11.0
106335450000-0003-2676-4431111BennySoewandiNaN[Benny Soewandi][[Conservation Efforts as a Result of Theoreti...NaN...NaNNaN[[Membership, Paguyuban Pelestarian Budaya Ban...2[Benny Soewandi]NaNNaNNaN[wordpress.com, wordpress.com, linkedin.com, f...11.0
106482410000-0001-8157-0600111BijanYavarSenior Research Assistant and Phd Student in O...[B. Yavar, Yavar Bijan][[Web of Science (Pub) Researcher ID: A-3544-2...NaN...[[Scopus Author ID, 56556873600]]NaNNaN6[Scopus - Elsevier]NaNNaNNaN[publons.com, articulate.com, zenodo.org, orci...15.0
106796990000-0002-9874-1450111FENGZHIWUNaNNaN[[A Systematic Study on the Dynamic Softening ...NaN...NaNNaNNaN3[FENGZHI WU]NaNNaNNaN[springer.com, sciencedirect.com, sciencedirec...23.0
\n", "

139 rows × 22 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email \\\n", "45566 0000-0003-1948-3180 1 1 \n", "72674 0000-0002-2000-8339 1 1 \n", "172820 0000-0001-9293-2224 1 1 \n", "209505 0000-0003-3045-0056 1 1 \n", "259877 0000-0003-1815-5732 1 1 \n", "... ... ... ... \n", "10494820 0000-0002-1324-7171 1 1 \n", "10495806 0000-0002-1700-8311 1 1 \n", "10633545 0000-0003-2676-4431 1 1 \n", "10648241 0000-0001-8157-0600 1 1 \n", "10679699 0000-0002-9874-1450 1 1 \n", "\n", " verified_primary_email given_names \\\n", "45566 1 Mark \n", "72674 1 Phòng khám tư nhân Hà Nội \n", "172820 1 Juan Carlos \n", "209505 1 Ananda \n", "259877 1 JAS \n", "... ... ... \n", "10494820 1 Vanesa Natalia \n", "10495806 1 Fix-IT \n", "10633545 1 Benny \n", "10648241 1 Bijan \n", "10679699 1 FENGZHI \n", "\n", " family_name \\\n", "45566 Katz \n", "72674 NaN \n", "172820 Garcia Hoyos \n", "209505 Majumdar \n", "259877 (Jurnal Akuntansi Syariah) \n", "... ... \n", "10494820 Rodriguez \n", "10495806 Rite \n", "10633545 Soewandi \n", "10648241 Yavar \n", "10679699 WU \n", "\n", " biography \\\n", "45566 Mark N. Katz is a professor of government and ... \n", "72674 NaN \n", "172820 My name is Juan Carlos García Hoyos. I was bor... \n", "209505 I am Ananda Majumdar, Child Care Educator at B... \n", "259877 JAS (Jurnal Akuntansi Syariah) published in pr... \n", "... ... \n", "10494820 Nombre y Apellido: Vanesa Natalia Rodriguez. ... \n", "10495806 NaN \n", "10633545 NaN \n", "10648241 Senior Research Assistant and Phd Student in O... \n", "10679699 NaN \n", "\n", " other_names \\\n", "45566 NaN \n", "72674 NaN \n", "172820 [Juan Carlos Garcia Hoyos /, EXTRATERRANOVAS /... \n", "209505 NaN \n", "259877 NaN \n", "... ... \n", "10494820 [Vanesa Rodriguez, Vanesa N. Rodriguez] \n", "10495806 [Best Heating & Plumbing Company] \n", "10633545 [Benny Soewandi] \n", "10648241 [B. Yavar, Yavar Bijan] \n", "10679699 NaN \n", "\n", " urls primary_email \\\n", "45566 [[Adjusting to Change: American Foreign Policy... NaN \n", "72674 [[Sức khỏe, https://onhealth.vn/], [Khám phụ k... NaN \n", "172820 [[Air Force Office of Scientific Research (WRI... NaN \n", "209505 [[Migration Scholar and Ananda , https://grfdt... NaN \n", "259877 [[Website, https://ejournal.stiesyariahbengkal... NaN \n", "... ... ... \n", "10494820 [[De rufianes y franchutas Representaciones y ... NaN \n", "10495806 [[Website, https://fix-itrite.com], [Muckrack,... NaN \n", "10633545 [[Conservation Efforts as a Result of Theoreti... NaN \n", "10648241 [[Web of Science (Pub) Researcher ID: A-3544-2... NaN \n", "10679699 [[A Systematic Study on the Dynamic Softening ... NaN \n", "\n", " ... external_ids \\\n", "45566 ... [[Scopus Author ID, 25649901800]] \n", "72674 ... NaN \n", "172820 ... NaN \n", "209505 ... NaN \n", "259877 ... NaN \n", "... ... ... \n", "10494820 ... NaN \n", "10495806 ... NaN \n", "10633545 ... NaN \n", "10648241 ... [[Scopus Author ID, 56556873600]] \n", "10679699 ... NaN \n", "\n", " education \\\n", "45566 [[Political Science, Ph.D., Massachusetts Inst... \n", "72674 NaN \n", "172820 [[Faculty of Philosophy, History - Ph.D., Char... \n", "209505 [[Education , B.Ed. After Degree , University ... \n", "259877 NaN \n", "... ... \n", "10494820 [[, Maestría en Ciencias Sociales con Mención ... \n", "10495806 NaN \n", "10633545 NaN \n", "10648241 NaN \n", "10679699 NaN \n", "\n", " employment n_works \\\n", "45566 [[Professor of Government and Politics, George... 58 \n", "72674 NaN 4 \n", "172820 [[responsible for the Project Service Level Ag... 20 \n", "209505 [[General Coordinator- University of Alberta C... 43 \n", "259877 NaN 67 \n", "... ... ... \n", "10494820 [[Profesora, Universidad Nacional de La Matanz... 7 \n", "10495806 NaN 1 \n", "10633545 [[Membership, Paguyuban Pelestarian Budaya Ban... 2 \n", "10648241 NaN 6 \n", "10679699 NaN 3 \n", "\n", " works_source primary_email_domain \\\n", "45566 [Scopus - Elsevier] NaN \n", "72674 [Phòng khám tư nhân Hà Nội] NaN \n", "172820 [Juan Carlos Garcia Hoyos] NaN \n", "209505 [Ananda Majumdar] NaN \n", "259877 [JAS (Jurnal Akuntansi Syariah)] NaN \n", "... ... ... \n", "10494820 [Vanesa Natalia Rodriguez] NaN \n", "10495806 [Fix-It Rite] NaN \n", "10633545 [Benny Soewandi] NaN \n", "10648241 [Scopus - Elsevier] NaN \n", "10679699 [FENGZHI WU] NaN \n", "\n", " other_email_domains n_emails \\\n", "45566 NaN NaN \n", "72674 NaN NaN \n", "172820 NaN NaN \n", "209505 NaN NaN \n", "259877 NaN NaN \n", "... ... ... \n", "10494820 NaN NaN \n", "10495806 NaN NaN \n", "10633545 NaN NaN \n", "10648241 NaN NaN \n", "10679699 NaN NaN \n", "\n", " url_domains n_urls \n", "45566 [wordpress.com, marknkatz.com, gmu.edu, atlant... 16.0 \n", "72674 [onhealth.vn, onhealth.vn, onhealth.vn, onheal... 49.0 \n", "172820 [af.mil, gst.com, govtribe.com, sbir.gov, open... 28.0 \n", "209505 [grfdt.com, linkedin.com, academia.edu, resear... 24.0 \n", "259877 [stiesyariahbengkalis.ac.id, lipi.go.id, cross... 17.0 \n", "... ... ... \n", "10494820 [unlam.edu.ar, unirioja.es, amazon.fr, abebook... 19.0 \n", "10495806 [fix-itrite.com, muckrack.com, tumblr.com, dri... 11.0 \n", "10633545 [wordpress.com, wordpress.com, linkedin.com, f... 11.0 \n", "10648241 [publons.com, articulate.com, zenodo.org, orci... 15.0 \n", "10679699 [springer.com, sciencedirect.com, sciencedirec... 23.0 \n", "\n", "[139 rows x 22 columns]" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)]" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...external_idseducationemploymentn_worksworks_sourceprimary_email_domainother_email_domainsn_emailsurl_domainsn_urls
00000-0003-1948-3180111MarkKatzMark N. Katz is a professor of government and ...NaN[[Adjusting to Change: American Foreign Policy...NaN...[[Scopus Author ID, 25649901800]][[Political Science, Ph.D., Massachusetts Inst...[[Professor of Government and Politics, George...58Scopus - ElsevierNaNNaNNaN[wordpress.com, marknkatz.com, gmu.edu, atlant...16.0
10000-0002-2000-8339111Phòng khám tư nhân Hà NộiNaNNaNNaN[[Sức khỏe, https://onhealth.vn/], [Khám phụ k...NaN...NaNNaNNaN4Phòng khám tư nhân Hà NộiNaNNaNNaN[onhealth.vn, onhealth.vn, onhealth.vn, onheal...49.0
20000-0001-9293-2224111Juan CarlosGarcia HoyosMy name is Juan Carlos García Hoyos. I was bor...[Juan Carlos Garcia Hoyos /, EXTRATERRANOVAS /...[[Air Force Office of Scientific Research (WRI...NaN...NaN[[Faculty of Philosophy, History - Ph.D., Char...[[responsible for the Project Service Level Ag...20Juan Carlos Garcia HoyosNaNNaNNaN[af.mil, gst.com, govtribe.com, sbir.gov, open...28.0
30000-0003-3045-0056111AnandaMajumdarI am Ananda Majumdar, Child Care Educator at B...NaN[[Migration Scholar and Ananda , https://grfdt...NaN...NaN[[Education , B.Ed. After Degree , University ...[[General Coordinator- University of Alberta C...43Ananda MajumdarNaNNaNNaN[grfdt.com, linkedin.com, academia.edu, resear...24.0
40000-0003-1815-5732111JAS(Jurnal Akuntansi Syariah)JAS (Jurnal Akuntansi Syariah) published in pr...NaN[[Website, https://ejournal.stiesyariahbengkal...NaN...NaNNaNNaN67JAS (Jurnal Akuntansi Syariah)NaNNaNNaN[stiesyariahbengkalis.ac.id, lipi.go.id, cross...17.0
..................................................................
1340000-0002-1324-7171111Vanesa NataliaRodriguezNombre y Apellido: Vanesa Natalia Rodriguez. ...[Vanesa Rodriguez, Vanesa N. Rodriguez][[De rufianes y franchutas Representaciones y ...NaN...NaN[[, Maestría en Ciencias Sociales con Mención ...[[Profesora, Universidad Nacional de La Matanz...7Vanesa Natalia RodriguezNaNNaNNaN[unlam.edu.ar, unirioja.es, amazon.fr, abebook...19.0
1350000-0002-1700-8311111Fix-ITRiteNaN[Best Heating & Plumbing Company][[Website, https://fix-itrite.com], [Muckrack,...NaN...NaNNaNNaN1Fix-It RiteNaNNaNNaN[fix-itrite.com, muckrack.com, tumblr.com, dri...11.0
1360000-0003-2676-4431111BennySoewandiNaN[Benny Soewandi][[Conservation Efforts as a Result of Theoreti...NaN...NaNNaN[[Membership, Paguyuban Pelestarian Budaya Ban...2Benny SoewandiNaNNaNNaN[wordpress.com, wordpress.com, linkedin.com, f...11.0
1370000-0001-8157-0600111BijanYavarSenior Research Assistant and Phd Student in O...[B. Yavar, Yavar Bijan][[Web of Science (Pub) Researcher ID: A-3544-2...NaN...[[Scopus Author ID, 56556873600]]NaNNaN6Scopus - ElsevierNaNNaNNaN[publons.com, articulate.com, zenodo.org, orci...15.0
1380000-0002-9874-1450111FENGZHIWUNaNNaN[[A Systematic Study on the Dynamic Softening ...NaN...NaNNaNNaN3FENGZHI WUNaNNaNNaN[springer.com, sciencedirect.com, sciencedirec...23.0
\n", "

139 rows × 22 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "0 0000-0003-1948-3180 1 1 1 \n", "1 0000-0002-2000-8339 1 1 1 \n", "2 0000-0001-9293-2224 1 1 1 \n", "3 0000-0003-3045-0056 1 1 1 \n", "4 0000-0003-1815-5732 1 1 1 \n", ".. ... ... ... ... \n", "134 0000-0002-1324-7171 1 1 1 \n", "135 0000-0002-1700-8311 1 1 1 \n", "136 0000-0003-2676-4431 1 1 1 \n", "137 0000-0001-8157-0600 1 1 1 \n", "138 0000-0002-9874-1450 1 1 1 \n", "\n", " given_names family_name \\\n", "0 Mark Katz \n", "1 Phòng khám tư nhân Hà Nội NaN \n", "2 Juan Carlos Garcia Hoyos \n", "3 Ananda Majumdar \n", "4 JAS (Jurnal Akuntansi Syariah) \n", ".. ... ... \n", "134 Vanesa Natalia Rodriguez \n", "135 Fix-IT Rite \n", "136 Benny Soewandi \n", "137 Bijan Yavar \n", "138 FENGZHI WU \n", "\n", " biography \\\n", "0 Mark N. Katz is a professor of government and ... \n", "1 NaN \n", "2 My name is Juan Carlos García Hoyos. I was bor... \n", "3 I am Ananda Majumdar, Child Care Educator at B... \n", "4 JAS (Jurnal Akuntansi Syariah) published in pr... \n", ".. ... \n", "134 Nombre y Apellido: Vanesa Natalia Rodriguez. ... \n", "135 NaN \n", "136 NaN \n", "137 Senior Research Assistant and Phd Student in O... \n", "138 NaN \n", "\n", " other_names \\\n", "0 NaN \n", "1 NaN \n", "2 [Juan Carlos Garcia Hoyos /, EXTRATERRANOVAS /... \n", "3 NaN \n", "4 NaN \n", ".. ... \n", "134 [Vanesa Rodriguez, Vanesa N. Rodriguez] \n", "135 [Best Heating & Plumbing Company] \n", "136 [Benny Soewandi] \n", "137 [B. Yavar, Yavar Bijan] \n", "138 NaN \n", "\n", " urls primary_email ... \\\n", "0 [[Adjusting to Change: American Foreign Policy... NaN ... \n", "1 [[Sức khỏe, https://onhealth.vn/], [Khám phụ k... NaN ... \n", "2 [[Air Force Office of Scientific Research (WRI... NaN ... \n", "3 [[Migration Scholar and Ananda , https://grfdt... NaN ... \n", "4 [[Website, https://ejournal.stiesyariahbengkal... NaN ... \n", ".. ... ... ... \n", "134 [[De rufianes y franchutas Representaciones y ... NaN ... \n", "135 [[Website, https://fix-itrite.com], [Muckrack,... NaN ... \n", "136 [[Conservation Efforts as a Result of Theoreti... NaN ... \n", "137 [[Web of Science (Pub) Researcher ID: A-3544-2... NaN ... \n", "138 [[A Systematic Study on the Dynamic Softening ... NaN ... \n", "\n", " external_ids \\\n", "0 [[Scopus Author ID, 25649901800]] \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", ".. ... \n", "134 NaN \n", "135 NaN \n", "136 NaN \n", "137 [[Scopus Author ID, 56556873600]] \n", "138 NaN \n", "\n", " education \\\n", "0 [[Political Science, Ph.D., Massachusetts Inst... \n", "1 NaN \n", "2 [[Faculty of Philosophy, History - Ph.D., Char... \n", "3 [[Education , B.Ed. After Degree , University ... \n", "4 NaN \n", ".. ... \n", "134 [[, Maestría en Ciencias Sociales con Mención ... \n", "135 NaN \n", "136 NaN \n", "137 NaN \n", "138 NaN \n", "\n", " employment n_works \\\n", "0 [[Professor of Government and Politics, George... 58 \n", "1 NaN 4 \n", "2 [[responsible for the Project Service Level Ag... 20 \n", "3 [[General Coordinator- University of Alberta C... 43 \n", "4 NaN 67 \n", ".. ... ... \n", "134 [[Profesora, Universidad Nacional de La Matanz... 7 \n", "135 NaN 1 \n", "136 [[Membership, Paguyuban Pelestarian Budaya Ban... 2 \n", "137 NaN 6 \n", "138 NaN 3 \n", "\n", " works_source primary_email_domain other_email_domains \\\n", "0 Scopus - Elsevier NaN NaN \n", "1 Phòng khám tư nhân Hà Nội NaN NaN \n", "2 Juan Carlos Garcia Hoyos NaN NaN \n", "3 Ananda Majumdar NaN NaN \n", "4 JAS (Jurnal Akuntansi Syariah) NaN NaN \n", ".. ... ... ... \n", "134 Vanesa Natalia Rodriguez NaN NaN \n", "135 Fix-It Rite NaN NaN \n", "136 Benny Soewandi NaN NaN \n", "137 Scopus - Elsevier NaN NaN \n", "138 FENGZHI WU NaN NaN \n", "\n", " n_emails url_domains n_urls \n", "0 NaN [wordpress.com, marknkatz.com, gmu.edu, atlant... 16.0 \n", "1 NaN [onhealth.vn, onhealth.vn, onhealth.vn, onheal... 49.0 \n", "2 NaN [af.mil, gst.com, govtribe.com, sbir.gov, open... 28.0 \n", "3 NaN [grfdt.com, linkedin.com, academia.edu, resear... 24.0 \n", "4 NaN [stiesyariahbengkalis.ac.id, lipi.go.id, cross... 17.0 \n", ".. ... ... ... \n", "134 NaN [unlam.edu.ar, unirioja.es, amazon.fr, abebook... 19.0 \n", "135 NaN [fix-itrite.com, muckrack.com, tumblr.com, dri... 11.0 \n", "136 NaN [wordpress.com, wordpress.com, linkedin.com, f... 11.0 \n", "137 NaN [publons.com, articulate.com, zenodo.org, orci... 15.0 \n", "138 NaN [springer.com, sciencedirect.com, sciencedirec... 23.0 \n", "\n", "[139 rows x 22 columns]" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "exploded_sources = df[(df['url_domains'].str.len() > 10) & (df['n_works'] > 0) & (df['works_source'].str.len() == 1)].explode('works_source').reset_index(drop=True)\n", "exploded_sources" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...external_idseducationemploymentn_worksworks_sourceprimary_email_domainother_email_domainsn_emailsurl_domainsn_urls
10000-0002-2000-8339111Phòng khám tư nhân Hà NộiNaNNaNNaN[[Sức khỏe, https://onhealth.vn/], [Khám phụ k...NaN...NaNNaNNaN4Phòng khám tư nhân Hà NộiNaNNaNNaN[onhealth.vn, onhealth.vn, onhealth.vn, onheal...49.0
20000-0001-9293-2224111Juan CarlosGarcia HoyosMy name is Juan Carlos García Hoyos. I was bor...[Juan Carlos Garcia Hoyos /, EXTRATERRANOVAS /...[[Air Force Office of Scientific Research (WRI...NaN...NaN[[Faculty of Philosophy, History - Ph.D., Char...[[responsible for the Project Service Level Ag...20Juan Carlos Garcia HoyosNaNNaNNaN[af.mil, gst.com, govtribe.com, sbir.gov, open...28.0
30000-0003-3045-0056111AnandaMajumdarI am Ananda Majumdar, Child Care Educator at B...NaN[[Migration Scholar and Ananda , https://grfdt...NaN...NaN[[Education , B.Ed. After Degree , University ...[[General Coordinator- University of Alberta C...43Ananda MajumdarNaNNaNNaN[grfdt.com, linkedin.com, academia.edu, resear...24.0
40000-0003-1815-5732111JAS(Jurnal Akuntansi Syariah)JAS (Jurnal Akuntansi Syariah) published in pr...NaN[[Website, https://ejournal.stiesyariahbengkal...NaN...NaNNaNNaN67JAS (Jurnal Akuntansi Syariah)NaNNaNNaN[stiesyariahbengkalis.ac.id, lipi.go.id, cross...17.0
50000-0002-4379-6454111Caroline WanjiruKariukiCaroline holds a PhD in Economics from Curtin ...NaN[[Scopus Profile, https://www.scopus.com/dashb...NaN...NaN[[Economics, Doctor of Philosophy , Curtin Uni...[[Director, Educational Development, Strathmor...4Caroline Wanjiru KariukiNaNNaNNaN[scopus.com, mendeley.com, publons.com, resear...13.0
..................................................................
1320000-0001-6352-7086111SusanHawthorneSusan is a poet, novelist, publisher and Sansk...[S. Hawthorne, Susan C. C. Hawthorne][[Spinifex Press, http://www.spinifexpress.com...NaN...[[ResearcherID, K-6039-2018]][[School of Asian Studies, Honours Sanskrit, A...[[Adjunct Professor, James Cook University, To...352Susan HawthorneNaNNaNNaN[spinifexpress.com.au, linkedin.com, twitter.c...12.0
1330000-0002-4062-3603111JUAN DE DIOSBELTRÁN MANCILLAJUAN DE DIOS BELTRÁN MANCILLA (*) Filósofo aut...[Juan de Dios Beltrán Mancilla, FILÓSOFO AUTOD...[[01.- Juan de Dios Beltrán Mancilla. Teoría O...NaN...NaN[[, DIPLOMADO EN PRACTICAS DIRECTIVAS PARA OR...[[INSPECTOR GENERAL JORNADA VESPERTINA // De 2...11JUAN DE DIOS BELTR´´ÁN MANCILLANaNNaNNaN[yumpu.com, ijopm.org, google.com, blogspot.co...69.0
1340000-0002-1324-7171111Vanesa NataliaRodriguezNombre y Apellido: Vanesa Natalia Rodriguez. ...[Vanesa Rodriguez, Vanesa N. Rodriguez][[De rufianes y franchutas Representaciones y ...NaN...NaN[[, Maestría en Ciencias Sociales con Mención ...[[Profesora, Universidad Nacional de La Matanz...7Vanesa Natalia RodriguezNaNNaNNaN[unlam.edu.ar, unirioja.es, amazon.fr, abebook...19.0
1360000-0003-2676-4431111BennySoewandiNaN[Benny Soewandi][[Conservation Efforts as a Result of Theoreti...NaN...NaNNaN[[Membership, Paguyuban Pelestarian Budaya Ban...2Benny SoewandiNaNNaNNaN[wordpress.com, wordpress.com, linkedin.com, f...11.0
1380000-0002-9874-1450111FENGZHIWUNaNNaN[[A Systematic Study on the Dynamic Softening ...NaN...NaNNaNNaN3FENGZHI WUNaNNaNNaN[springer.com, sciencedirect.com, sciencedirec...23.0
\n", "

108 rows × 22 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "1 0000-0002-2000-8339 1 1 1 \n", "2 0000-0001-9293-2224 1 1 1 \n", "3 0000-0003-3045-0056 1 1 1 \n", "4 0000-0003-1815-5732 1 1 1 \n", "5 0000-0002-4379-6454 1 1 1 \n", ".. ... ... ... ... \n", "132 0000-0001-6352-7086 1 1 1 \n", "133 0000-0002-4062-3603 1 1 1 \n", "134 0000-0002-1324-7171 1 1 1 \n", "136 0000-0003-2676-4431 1 1 1 \n", "138 0000-0002-9874-1450 1 1 1 \n", "\n", " given_names family_name \\\n", "1 Phòng khám tư nhân Hà Nội NaN \n", "2 Juan Carlos Garcia Hoyos \n", "3 Ananda Majumdar \n", "4 JAS (Jurnal Akuntansi Syariah) \n", "5 Caroline Wanjiru Kariuki \n", ".. ... ... \n", "132 Susan Hawthorne \n", "133 JUAN DE DIOS BELTRÁN MANCILLA \n", "134 Vanesa Natalia Rodriguez \n", "136 Benny Soewandi \n", "138 FENGZHI WU \n", "\n", " biography \\\n", "1 NaN \n", "2 My name is Juan Carlos García Hoyos. I was bor... \n", "3 I am Ananda Majumdar, Child Care Educator at B... \n", "4 JAS (Jurnal Akuntansi Syariah) published in pr... \n", "5 Caroline holds a PhD in Economics from Curtin ... \n", ".. ... \n", "132 Susan is a poet, novelist, publisher and Sansk... \n", "133 JUAN DE DIOS BELTRÁN MANCILLA (*) Filósofo aut... \n", "134 Nombre y Apellido: Vanesa Natalia Rodriguez. ... \n", "136 NaN \n", "138 NaN \n", "\n", " other_names \\\n", "1 NaN \n", "2 [Juan Carlos Garcia Hoyos /, EXTRATERRANOVAS /... \n", "3 NaN \n", "4 NaN \n", "5 NaN \n", ".. ... \n", "132 [S. Hawthorne, Susan C. C. Hawthorne] \n", "133 [Juan de Dios Beltrán Mancilla, FILÓSOFO AUTOD... \n", "134 [Vanesa Rodriguez, Vanesa N. Rodriguez] \n", "136 [Benny Soewandi] \n", "138 NaN \n", "\n", " urls primary_email ... \\\n", "1 [[Sức khỏe, https://onhealth.vn/], [Khám phụ k... NaN ... \n", "2 [[Air Force Office of Scientific Research (WRI... NaN ... \n", "3 [[Migration Scholar and Ananda , https://grfdt... NaN ... \n", "4 [[Website, https://ejournal.stiesyariahbengkal... NaN ... \n", "5 [[Scopus Profile, https://www.scopus.com/dashb... NaN ... \n", ".. ... ... ... \n", "132 [[Spinifex Press, http://www.spinifexpress.com... NaN ... \n", "133 [[01.- Juan de Dios Beltrán Mancilla. Teoría O... NaN ... \n", "134 [[De rufianes y franchutas Representaciones y ... NaN ... \n", "136 [[Conservation Efforts as a Result of Theoreti... NaN ... \n", "138 [[A Systematic Study on the Dynamic Softening ... NaN ... \n", "\n", " external_ids \\\n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "5 NaN \n", ".. ... \n", "132 [[ResearcherID, K-6039-2018]] \n", "133 NaN \n", "134 NaN \n", "136 NaN \n", "138 NaN \n", "\n", " education \\\n", "1 NaN \n", "2 [[Faculty of Philosophy, History - Ph.D., Char... \n", "3 [[Education , B.Ed. After Degree , University ... \n", "4 NaN \n", "5 [[Economics, Doctor of Philosophy , Curtin Uni... \n", ".. ... \n", "132 [[School of Asian Studies, Honours Sanskrit, A... \n", "133 [[, DIPLOMADO EN PRACTICAS DIRECTIVAS PARA OR... \n", "134 [[, Maestría en Ciencias Sociales con Mención ... \n", "136 NaN \n", "138 NaN \n", "\n", " employment n_works \\\n", "1 NaN 4 \n", "2 [[responsible for the Project Service Level Ag... 20 \n", "3 [[General Coordinator- University of Alberta C... 43 \n", "4 NaN 67 \n", "5 [[Director, Educational Development, Strathmor... 4 \n", ".. ... ... \n", "132 [[Adjunct Professor, James Cook University, To... 352 \n", "133 [[INSPECTOR GENERAL JORNADA VESPERTINA // De 2... 11 \n", "134 [[Profesora, Universidad Nacional de La Matanz... 7 \n", "136 [[Membership, Paguyuban Pelestarian Budaya Ban... 2 \n", "138 NaN 3 \n", "\n", " works_source primary_email_domain \\\n", "1 Phòng khám tư nhân Hà Nội NaN \n", "2 Juan Carlos Garcia Hoyos NaN \n", "3 Ananda Majumdar NaN \n", "4 JAS (Jurnal Akuntansi Syariah) NaN \n", "5 Caroline Wanjiru Kariuki NaN \n", ".. ... ... \n", "132 Susan Hawthorne NaN \n", "133 JUAN DE DIOS BELTR´´ÁN MANCILLA NaN \n", "134 Vanesa Natalia Rodriguez NaN \n", "136 Benny Soewandi NaN \n", "138 FENGZHI WU NaN \n", "\n", " other_email_domains n_emails \\\n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "5 NaN NaN \n", ".. ... ... \n", "132 NaN NaN \n", "133 NaN NaN \n", "134 NaN NaN \n", "136 NaN NaN \n", "138 NaN NaN \n", "\n", " url_domains n_urls \n", "1 [onhealth.vn, onhealth.vn, onhealth.vn, onheal... 49.0 \n", "2 [af.mil, gst.com, govtribe.com, sbir.gov, open... 28.0 \n", "3 [grfdt.com, linkedin.com, academia.edu, resear... 24.0 \n", "4 [stiesyariahbengkalis.ac.id, lipi.go.id, cross... 17.0 \n", "5 [scopus.com, mendeley.com, publons.com, resear... 13.0 \n", ".. ... ... \n", "132 [spinifexpress.com.au, linkedin.com, twitter.c... 12.0 \n", "133 [yumpu.com, ijopm.org, google.com, blogspot.co... 69.0 \n", "134 [unlam.edu.ar, unirioja.es, amazon.fr, abebook... 19.0 \n", "136 [wordpress.com, wordpress.com, linkedin.com, f... 11.0 \n", "138 [springer.com, sciencedirect.com, sciencedirec... 23.0 \n", "\n", "[108 rows x 22 columns]" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "exploded_sources[exploded_sources.apply(lambda x: x['works_source'].find(x['given_names']) >= 0, axis=1)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Works source" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Paste from Miriam" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## External IDs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "External IDs should come from reliable sources. ORCiD registrants cannot add them freely." ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [], "source": [ "df['n_ids'] = df[df['external_ids'].notna()].external_ids.str.len()" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 1.285292e+06\n", "mean 1.357162e+00\n", "std 6.607097e-01\n", "min 1.000000e+00\n", "25% 1.000000e+00\n", "50% 1.000000e+00\n", "75% 2.000000e+00\n", "max 8.000000e+01\n", "Name: n_ids, dtype: float64" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.n_ids.describe()" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidclaimedverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_email...educationemploymentn_worksworks_sourceprimary_email_domainother_email_domainsn_emailsurl_domainsn_urlsn_ids
92287930000-0002-9554-6633111John AWilliamsNaNNaN[[Aston University profile page, https://resea...NaN...NaN[[, Aston University, Birmingham, , GB, 1722, ...91[Aston Research Explorer]NaNNaNNaN[aston.ac.uk]1.080.0
\n", "

1 rows × 23 columns

\n", "
" ], "text/plain": [ " orcid claimed verified_email verified_primary_email \\\n", "9228793 0000-0002-9554-6633 1 1 1 \n", "\n", " given_names family_name biography other_names \\\n", "9228793 John A Williams NaN NaN \n", "\n", " urls primary_email ... \\\n", "9228793 [[Aston University profile page, https://resea... NaN ... \n", "\n", " education employment n_works \\\n", "9228793 NaN [[, Aston University, Birmingham, , GB, 1722, ... 91 \n", "\n", " works_source primary_email_domain other_email_domains \\\n", "9228793 [Aston Research Explorer] NaN NaN \n", "\n", " n_emails url_domains n_urls n_ids \n", "9228793 NaN [aston.ac.uk] 1.0 80.0 \n", "\n", "[1 rows x 23 columns]" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df.n_ids == df.n_ids.max()]" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "ids = df[['orcid', 'external_ids']].explode('external_ids').reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 64, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidexternal_idsprovider
130000-0001-5004-4608[Scopus Author ID, 40661094300]Scopus Author ID
240000-0001-5008-2479[Scopus Author ID, 12789856200]Scopus Author ID
250000-0001-5008-2479[Ciência ID, 2F1C-479B-B071]Ciência ID
310000-0001-5010-9539[Loop profile, 1098977]Loop profile
420000-0001-5013-6529[Scopus Author ID, 8986698300]Scopus Author ID
............
112036400000-0003-4985-9169[Scopus Author ID, 23972479900]Scopus Author ID
112036420000-0003-4986-2106[Scopus Author ID, 57189299099]Scopus Author ID
112036430000-0003-4986-2106[Loop profile, 947925]Loop profile
112036530000-0003-4990-3115[Loop profile, 991591]Loop profile
112036620000-0003-4994-6043[Scopus Author ID, 55220889800]Scopus Author ID
\n", "

1744349 rows × 3 columns

\n", "
" ], "text/plain": [ " orcid external_ids \\\n", "13 0000-0001-5004-4608 [Scopus Author ID, 40661094300] \n", "24 0000-0001-5008-2479 [Scopus Author ID, 12789856200] \n", "25 0000-0001-5008-2479 [Ciência ID, 2F1C-479B-B071] \n", "31 0000-0001-5010-9539 [Loop profile, 1098977] \n", "42 0000-0001-5013-6529 [Scopus Author ID, 8986698300] \n", "... ... ... \n", "11203640 0000-0003-4985-9169 [Scopus Author ID, 23972479900] \n", "11203642 0000-0003-4986-2106 [Scopus Author ID, 57189299099] \n", "11203643 0000-0003-4986-2106 [Loop profile, 947925] \n", "11203653 0000-0003-4990-3115 [Loop profile, 991591] \n", "11203662 0000-0003-4994-6043 [Scopus Author ID, 55220889800] \n", "\n", " provider \n", "13 Scopus Author ID \n", "24 Scopus Author ID \n", "25 Ciência ID \n", "31 Loop profile \n", "42 Scopus Author ID \n", "... ... \n", "11203640 Scopus Author ID \n", "11203642 Scopus Author ID \n", "11203643 Loop profile \n", "11203653 Loop profile \n", "11203662 Scopus Author ID \n", "\n", "[1744349 rows x 3 columns]" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ids[ids.provider.notna()]" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "ids['provider'] = ids[ids.external_ids.notna()]['external_ids'].apply(lambda x: x[0])" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidexternal_idsprovider
130000-0001-5004-4608[Scopus Author ID, 40661094300]Scopus Author ID
240000-0001-5008-2479[Scopus Author ID, 12789856200]Scopus Author ID
250000-0001-5008-2479[Ciência ID, 2F1C-479B-B071]Ciência ID
310000-0001-5010-9539[Loop profile, 1098977]Loop profile
420000-0001-5013-6529[Scopus Author ID, 8986698300]Scopus Author ID
\n", "
" ], "text/plain": [ " orcid external_ids provider\n", "13 0000-0001-5004-4608 [Scopus Author ID, 40661094300] Scopus Author ID\n", "24 0000-0001-5008-2479 [Scopus Author ID, 12789856200] Scopus Author ID\n", "25 0000-0001-5008-2479 [Ciência ID, 2F1C-479B-B071] Ciência ID\n", "31 0000-0001-5010-9539 [Loop profile, 1098977] Loop profile\n", "42 0000-0001-5013-6529 [Scopus Author ID, 8986698300] Scopus Author ID" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ids[ids.provider.notna()].head()" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "Scopus Author ID", "ResearcherID", "Loop profile", "Ciência ID", "Researcher Name Resolver ID", "中国科学家在线", "ISNI", "GND", "Pitt ID", "Technical University of Denmark CWIS", "Sciprofile", "Researcher ID", "ID Dialnet", "Digital author ID", "Scopus Author ID: ", "AuthenticusID", "HKU ResearcherPage", "UOW Scholars", "CTI Vitae", "Scopus Author ID:", "HKUST Profile", "Scopus author ID", "Chalmers ID", "Scopus ID", "iAuthor", "Google Scholar", "AuthID", "DAI", "US EPA VIVO", "Digital Author ID (DAI)", "Scopus ID", "Authenticus", "Smithsonian Profiles", "GitHub", "eScientist", "VIVO Cornell", "ResearcherID:", "Digital Author ID", "Digital author ID (DAI)", "ID Dialnet:", "Dialnet ID", "KAKEN", "UNE Researcher ID", "ResearcherID: ", "ORCID", "Custom", "ORCID iD", "ScienceOpen", "ResearcherId", "Profile system identifier" ], "y": [ 1015175, 543545, 114316, 33870, 7810, 4794, 3054, 2825, 2672, 2486, 2441, 1417, 1167, 1079, 1076, 847, 740, 644, 581, 549, 521, 501, 430, 232, 212, 200, 175, 153, 146, 135, 127, 82, 61, 51, 49, 46, 39, 35, 34, 7, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "IDs provided" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data = [\n", " go.Bar(\n", " x=ids.groupby('provider').count().sort_values('orcid', ascending=False).index,\n", " y=ids.groupby('provider').count().sort_values('orcid', ascending=False)['orcid']\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='IDs provided',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([nan, 'Scopus Author ID', 'Ciência ID', 'Loop profile',\n", " 'ResearcherID', 'Researcher Name Resolver ID', 'UOW Scholars',\n", " '中国科学家在线', 'Pitt ID', 'AuthenticusID', 'Sciprofile', 'GND', 'ISNI',\n", " 'HKU ResearcherPage', 'CTI Vitae', 'Researcher ID', 'ID Dialnet',\n", " 'Digital author ID', 'HKUST Profile',\n", " 'Technical University of Denmark CWIS', 'Scopus Author ID: ',\n", " 'Digital Author ID (DAI)', 'Scopus Author ID:', 'Google Scholar',\n", " 'AuthID', 'Digital Author ID', 'iAuthor', 'US EPA VIVO', 'GitHub',\n", " 'Scopus author ID', 'Chalmers ID', 'Scopus ID', 'Authenticus',\n", " 'VIVO Cornell', 'Scopus ID', 'ScienceOpen',\n", " 'Smithsonian Profiles', 'ResearcherID:', 'DAI', 'eScientist',\n", " 'KAKEN', 'Digital author ID (DAI)', 'ORCID', 'ID Dialnet:',\n", " 'Dialnet ID', 'UNE Researcher ID', 'ResearcherID: ',\n", " 'Profile system identifier', 'Custom', 'ResearcherId', 'ORCID iD'],\n", " dtype=object)" ] }, "execution_count": 61, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.unique(ids['provider'])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Keywords" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [], "source": [ "df['n_keywords'] = df.keywords.str.len()" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidn_keywords
16813100000-0002-0673-0341154.0
77176990000-0002-7060-4112141.0
45976740000-0002-6075-3501140.0
20665800000-0002-4071-0301118.0
35310300000-0002-9638-8091115.0
.........
107470350000-0003-4998-1551NaN
107470360000-0003-4998-4111NaN
107470370000-0003-4998-6045NaN
107470380000-0003-4998-8868NaN
107470390000-0003-4999-7916NaN
\n", "

10744621 rows × 2 columns

\n", "
" ], "text/plain": [ " orcid n_keywords\n", "1681310 0000-0002-0673-0341 154.0\n", "7717699 0000-0002-7060-4112 141.0\n", "4597674 0000-0002-6075-3501 140.0\n", "2066580 0000-0002-4071-0301 118.0\n", "3531030 0000-0002-9638-8091 115.0\n", "... ... ...\n", "10747035 0000-0003-4998-1551 NaN\n", "10747036 0000-0003-4998-4111 NaN\n", "10747037 0000-0003-4998-6045 NaN\n", "10747038 0000-0003-4998-8868 NaN\n", "10747039 0000-0003-4999-7916 NaN\n", "\n", "[10744621 rows x 2 columns]" ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values('n_keywords', ascending=False)[['orcid', 'n_keywords']]" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "0000-0002-0673-0341", "0000-0002-7060-4112", "0000-0002-6075-3501", "0000-0002-4071-0301", "0000-0002-9638-8091", "0000-0002-4235-4259", "0000-0001-9462-5666", "0000-0003-0076-6287", "0000-0002-1878-9762", "0000-0001-6537-7683", "0000-0001-6307-6027", "0000-0003-2273-9888", "0000-0003-1799-0971", "0000-0001-5287-1949", "0000-0002-0937-7061", "0000-0001-9715-9357", "0000-0001-5696-1052", "0000-0003-2998-5520", "0000-0001-5869-2204", "0000-0002-0156-3580", "0000-0002-9625-6742", "0000-0002-8401-8018", "0000-0001-9985-1697", "0000-0003-4246-8579", "0000-0002-7710-0355", "0000-0002-8083-7382", "0000-0001-7654-5013", "0000-0001-6939-3859", "0000-0002-3061-3364", "0000-0003-2509-2549", "0000-0002-0463-0048", "0000-0001-5230-715X", "0000-0001-5458-7167", "0000-0001-9336-6850", "0000-0003-0209-180X", "0000-0002-8227-5387", "0000-0002-9381-2264", "0000-0003-3340-6413", "0000-0003-3584-6834", "0000-0002-2935-1934", "0000-0002-8644-8396", "0000-0002-8659-6321", "0000-0002-3123-3021", "0000-0001-5637-1124", "0000-0001-5167-7466", "0000-0002-8449-2211", "0000-0003-2532-2906", "0000-0002-3532-043X", "0000-0002-2683-4527", "0000-0003-4505-3678", "0000-0002-6347-9464", "0000-0003-4608-3844", "0000-0003-4374-6374", "0000-0003-4511-7942", "0000-0002-1103-9651", "0000-0003-3720-1183", "0000-0001-9280-6017", "0000-0003-4673-1063", "0000-0001-9586-0780", "0000-0002-5539-1761", "0000-0003-2550-1859", "0000-0002-8499-1045", "0000-0003-2218-1343", "0000-0002-5306-7781", "0000-0003-1863-0265", "0000-0002-8072-1152", "0000-0003-3342-6123", "0000-0002-2252-672X", "0000-0002-3907-3552", "0000-0001-6861-9561", "0000-0002-3597-3350", "0000-0002-5274-7742", "0000-0002-3186-8860", "0000-0001-8689-185X", "0000-0002-6282-0640", "0000-0003-3343-5660", "0000-0001-7133-7848", "0000-0003-4486-2684", "0000-0002-9014-2090", "0000-0001-6843-9325", "0000-0003-0097-4182", "0000-0003-1245-7705", "0000-0002-4432-3448", "0000-0001-7857-4133", "0000-0002-1294-2156", "0000-0001-8445-412X", "0000-0002-1411-3028", "0000-0003-3387-3193", "0000-0002-1545-7818", "0000-0002-9125-6022", "0000-0002-3898-9542", "0000-0002-3866-6460", "0000-0003-4283-2895", "0000-0003-4153-6779", "0000-0002-4598-2891", "0000-0002-0211-7195", "0000-0002-1770-9660", "0000-0002-1960-5857", "0000-0003-2640-6757", "0000-0002-5432-9595" ], "y": [ 154, 141, 140, 118, 115, 104, 98, 94, 92, 91, 88, 86, 84, 82, 78, 77, 76, 75, 74, 73, 71, 70, 69, 66, 64, 62, 61, 60, 58, 57, 56, 54, 53, 53, 52, 51, 51, 51, 51, 50, 50, 50, 50, 49, 49, 49, 48, 48, 48, 48, 48, 48, 47, 47, 46, 46, 46, 45, 45, 44, 44, 44, 44, 44, 44, 43, 43, 42, 42, 42, 42, 42, 42, 41, 41, 41, 41, 41, 41, 41, 40, 40, 40, 40, 40, 40, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 38, 38, 38, 38 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Keywords provided" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data = [\n", " go.Bar(\n", " x=df.sort_values('n_keywords', ascending=False)['orcid'][:100],\n", " y=df.sort_values('n_keywords', ascending=False)['n_keywords'][:100]\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Keywords provided',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Correlation" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "coloraxis": "coloraxis", "hovertemplate": "x: %{x}
y: %{y}
color: %{z}", "name": "0", "type": "heatmap", "x": [ "claimed", "verified_email", "verified_primary_email", "n_works", "n_emails", "n_urls", "n_ids", "n_keywords" ], "xaxis": "x", "y": [ "claimed", "verified_email", "verified_primary_email", "n_works", "n_emails", "n_urls", "n_ids", "n_keywords" ], "yaxis": "y", "z": [ [ null, null, null, null, null, null, null, null ], [ null, 1, 0.9764600091179001, 0.06481728326324665, 0.011268135706995959, 0.016235518285109687, 0.08833871138587861, 0.017045184883982135 ], [ null, 0.9764600091179001, 1, 0.0659649633755603, 0.009337798958686118, 0.0168692743777146, 0.08953190210488794, 0.017626925761491493 ], [ null, 0.06481728326324665, 0.0659649633755603, 1, 0.05241643710974057, 0.05179828612278866, 0.23927720131732308, 0.030960689715636 ], [ null, 0.011268135706995959, 0.009337798958686118, 0.05241643710974057, 1, 0.11119920854300894, 0.04817263453943147, 0.04157758861961359 ], [ null, 0.016235518285109687, 0.0168692743777146, 0.05179828612278866, 0.11119920854300894, 1, 0.06925708918455128, 0.15926017909633472 ], [ null, 0.08833871138587861, 0.08953190210488794, 0.23927720131732308, 0.04817263453943147, 0.06925708918455128, 1, 0.06320236481237387 ], [ null, 0.017045184883982135, 0.017626925761491493, 0.030960689715636, 0.04157758861961359, 0.15926017909633472, 0.06320236481237387, 1 ] ] } ], "layout": { "coloraxis": { "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "margin": { "t": 60 }, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "xaxis": { "anchor": "y", "constrain": "domain", "domain": [ 0, 1 ], "scaleanchor": "y" }, "yaxis": { "anchor": "x", "autorange": "reversed", "constrain": "domain", "domain": [ 0, 1 ] } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = px.imshow(df[df.n_ids > 0].corr())\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }