From 31209807a8d407d85cbb87ef293958796f5e0bf3 Mon Sep 17 00:00:00 2001 From: "miriam.baglioni" Date: Fri, 26 Mar 2021 15:14:54 +0100 Subject: [PATCH] part of the exploratory analysis related to the authoritative works source --- notebooks/01.1-Exploration_WorksSource.ipynb | 2967 ++++++++++++++++++ 1 file changed, 2967 insertions(+) create mode 100644 notebooks/01.1-Exploration_WorksSource.ipynb diff --git a/notebooks/01.1-Exploration_WorksSource.ipynb b/notebooks/01.1-Exploration_WorksSource.ipynb new file mode 100644 index 0000000..4156dd5 --- /dev/null +++ b/notebooks/01.1-Exploration_WorksSource.ipynb @@ -0,0 +1,2967 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "subsequent-cornell", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import glob\n", + "\n", + "import pandas as pd\n", + "import ast\n", + "import tldextract\n", + "import numpy\n", + "\n", + "import plotly\n", + "from plotly.offline import iplot, init_notebook_mode\n", + "import plotly.graph_objs as go\n", + "import plotly.express as px\n", + "\n", + "init_notebook_mode(connected=True)\n", + "TOP_N = 0\n", + "TOP_RANGE = [0, 0]\n", + "def set_top_n(n):\n", + " global TOP_N, TOP_RANGE\n", + " TOP_N = n\n", + " TOP_RANGE = [-.5, n - 1 + .5]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "hydraulic-baker", + "metadata": {}, + "outputs": [], + "source": [ + "parts = glob.glob('/Users/miriam.baglioni/Develop/Gitea/fake-orcid-analysis-v2/fake-orcid-analysis/data/processed/dataset.pkl.*')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "lesbian-routine", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_emailother_emails...employmentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabel
100000000000-0002-7790-048310abeleliasNaNNaNNaNNaNNaN...NaN0NaN2020-09-16t16:51:54.155z2020-09-16t17:00:08.451z00000
100000010000-0001-6368-053100abelardoramirezNaNNaNNaNNaNNaN...NaN0NaN2017-05-10t19:28:13.217z2017-05-10t19:28:17.315z00000
100000020000-0001-8149-490011abelardomancinasNaNNaNNaNNaNNaN...[[profesor investigador, instituto tecnológico...0NaN2018-10-15t21:46:52.162z2020-01-13t03:33:47.645z00000
100000030000-0002-8684-242200aberanigussieNaNNaNNaNNaNNaN...NaN0NaN2020-09-23t08:36:17.451z2020-09-23t08:36:17.450z00000
100000040000-0003-4814-787211abhijeetsinghNaNNaNNaNNaNNaN...NaN0NaN2018-05-01t22:43:17.407z2018-10-06t22:21:54.024z00000
\n", + "

5 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " orcid verified_email verified_primary_email \\\n", + "10000000 0000-0002-7790-0483 1 0 \n", + "10000001 0000-0001-6368-0531 0 0 \n", + "10000002 0000-0001-8149-4900 1 1 \n", + "10000003 0000-0002-8684-2422 0 0 \n", + "10000004 0000-0003-4814-7872 1 1 \n", + "\n", + " given_names family_name biography other_names urls primary_email \\\n", + "10000000 abel elias NaN NaN NaN NaN \n", + "10000001 abelardo ramirez NaN NaN NaN NaN \n", + "10000002 abelardo mancinas NaN NaN NaN NaN \n", + "10000003 abera nigussie NaN NaN NaN NaN \n", + "10000004 abhijeet singh NaN NaN NaN NaN \n", + "\n", + " other_emails ... employment \\\n", + "10000000 NaN ... NaN \n", + "10000001 NaN ... NaN \n", + "10000002 NaN ... [[profesor investigador, instituto tecnológico... \n", + "10000003 NaN ... NaN \n", + "10000004 NaN ... NaN \n", + "\n", + " n_works works_source activation_date \\\n", + "10000000 0 NaN 2020-09-16t16:51:54.155z \n", + "10000001 0 NaN 2017-05-10t19:28:13.217z \n", + "10000002 0 NaN 2018-10-15t21:46:52.162z \n", + "10000003 0 NaN 2020-09-23t08:36:17.451z \n", + "10000004 0 NaN 2018-05-01t22:43:17.407z \n", + "\n", + " last_update_date n_doi n_arxiv n_pmc n_other_pids label \n", + "10000000 2020-09-16t17:00:08.451z 0 0 0 0 0 \n", + "10000001 2017-05-10t19:28:17.315z 0 0 0 0 0 \n", + "10000002 2020-01-13t03:33:47.645z 0 0 0 0 0 \n", + "10000003 2020-09-23t08:36:17.450z 0 0 0 0 0 \n", + "10000004 2018-10-06t22:21:54.024z 0 0 0 0 0 \n", + "\n", + "[5 rows x 23 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.concat((pd.read_pickle(part) for part in parts))\n", + "df.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "olympic-missile", + "metadata": {}, + "outputs": [], + "source": [ + "def remove_own_source(lst, given, family):\n", + " res = []\n", + " if isinstance(lst, list) and pd.notna(given):\n", + " for ws in lst:\n", + " if ws.lower().find(given.lower()) == -1:\n", + " if pd.notna(family):\n", + " if ws.lower().find(family.lower()) == -1:\n", + " res.append(ws)\n", + " else:\n", + " res.append(ws)\n", + " return res" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "informational-carrier", + "metadata": {}, + "outputs": [], + "source": [ + "df['ext_works_source'] = df.apply(lambda x: remove_own_source(x['works_source'], x['given_names'], x['family_name']), axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "hydraulic-pharmaceutical", + "metadata": {}, + "outputs": [], + "source": [ + "df['n_ext_work_source'] = df.ext_works_source.str.len()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "tropical-stockholm", + "metadata": {}, + "outputs": [], + "source": [ + "exploded_external_sources = df[df['ext_works_source'].str.len() > 0][['orcid','ext_works_source']].explode('ext_works_source').reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "enhanced-blanket", + "metadata": {}, + "outputs": [], + "source": [ + "grouped_ext_sources = exploded_external_sources.groupby('ext_works_source').count().sort_values('orcid', ascending=False).reset_index()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "black-congo", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "linkText": "Export to plot.ly", + "plotlyServerURL": "https://plot.ly", + "showLink": false + }, + "data": [ + { + "type": "bar", + "x": [ + "crossref", + "scopus - elsevier", + "crossref metadata search", + "multidisciplinary digital publishing institute", + "europe pubmed central", + "researcherid", + "publons", + "ciênciavitae", + "base - bielefeld academic search engine", + "datacite", + "redalyc", + "mla international bibliography", + "deutsche nationalbibliothek (dnb)", + "nasa astrophysics data system", + "national information processing institute ", + "f1000", + "inspire-hep", + "university of helsinki", + "hal", + "igi global", + "airiti", + "university of copenhagen", + "universidade federal de uberlândia", + "aarhus university", + "universidad del país vasco", + "university of manchester - pure", + "kings college london", + "university of southern denmark", + "wellcome open research", + "macquarie university" + ], + "y": [ + 1460841, + 902231, + 297684, + 281664, + 181605, + 158148, + 39786, + 32315, + 20699, + 16107, + 9640, + 8059, + 7855, + 7403, + 6509, + 5221, + 4872, + 4152, + 4136, + 3833, + 3725, + 3127, + 2718, + 2311, + 2271, + 2227, + 2199, + 2185, + 2113, + 2053 + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Top 30 works_source" + }, + "xaxis": { + "tickangle": 45, + "tickfont": { + "size": 12 + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "data = [\n", + " go.Bar(\n", + " x=grouped_ext_sources[:30].ext_works_source,\n", + " y=grouped_ext_sources[:30].orcid\n", + " )\n", + "]\n", + "\n", + "layout = go.Layout(\n", + " title='Top 30 works_source',\n", + " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", + ")\n", + "fig = go.Figure(data=data, layout=layout)\n", + "plotly.offline.iplot(fig)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "sophisticated-madness", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ext_works_sourceorcid
0crossref1460841
1scopus - elsevier902231
2crossref metadata search297684
3multidisciplinary digital publishing institute281664
4europe pubmed central181605
.........
337uta - oa journal global insight3
338francis crick institute3
339anna3
340santos3
341universitäts- und stadtbibliothek köln3
\n", + "

342 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " ext_works_source orcid\n", + "0 crossref 1460841\n", + "1 scopus - elsevier 902231\n", + "2 crossref metadata search 297684\n", + "3 multidisciplinary digital publishing institute 281664\n", + "4 europe pubmed central 181605\n", + ".. ... ...\n", + "337 uta - oa journal global insight 3\n", + "338 francis crick institute 3\n", + "339 anna 3\n", + "340 santos 3\n", + "341 universitäts- und stadtbibliothek köln 3\n", + "\n", + "[342 rows x 2 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "authoritative_sources = grouped_ext_sources[grouped_ext_sources['orcid'] > 2]\n", + "authoritative_sources" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "fifty-translator", + "metadata": {}, + "outputs": [], + "source": [ + "exploded_external_sources['authoritative'] = exploded_external_sources.ext_works_source.isin(authoritative_sources['ext_works_source'])" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "sweet-silicon", + "metadata": {}, + "outputs": [], + "source": [ + "orcid_authoritative_source = exploded_external_sources.groupby('orcid')['authoritative'].any().reset_index()[['orcid', 'authoritative']]" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "iraqi-million", + "metadata": {}, + "outputs": [], + "source": [ + "df = df.set_index('orcid').join(orcid_authoritative_source.set_index('orcid')).reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "current-convergence", + "metadata": {}, + "outputs": [], + "source": [ + "df.loc[df.authoritative.isna(), 'authoritative'] = False" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "median-smith", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_emailother_emails...activation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelext_works_sourcen_ext_work_sourceauthoritative
00000-0002-7790-048310abeleliasNaNNaNNaNNaNNaN...2020-09-16t16:51:54.155z2020-09-16t17:00:08.451z00000[]0False
10000-0001-6368-053100abelardoramirezNaNNaNNaNNaNNaN...2017-05-10t19:28:13.217z2017-05-10t19:28:17.315z00000[]0False
20000-0001-8149-490011abelardomancinasNaNNaNNaNNaNNaN...2018-10-15t21:46:52.162z2020-01-13t03:33:47.645z00000[]0False
30000-0002-8684-242200aberanigussieNaNNaNNaNNaNNaN...2020-09-23t08:36:17.451z2020-09-23t08:36:17.450z00000[]0False
40000-0003-4814-787211abhijeetsinghNaNNaNNaNNaNNaN...2018-05-01t22:43:17.407z2018-10-06t22:21:54.024z00000[]0False
..................................................................
109896440000-0001-7468-988111abeerelbaroudiNaNNaNNaNNaNNaN...2020-02-06t15:04:42.485z2020-02-06t15:16:45.537z00000[]0False
109896450000-0003-0081-428511abeersohrabNaNNaNNaNNaNNaN...2020-05-12t22:39:26.356z2020-05-12t22:41:45.239z00000[]0False
109896460000-0003-2004-345700abeerabdelmaksoudNaNNaNNaNNaNNaN...2019-12-19t23:09:12.579z2019-12-19t23:09:12.798z00000[]0False
109896470000-0003-2841-975411abeeral-ghazaliNaNNaNNaNNaNNaN...2019-06-02t18:35:32.973z2019-08-05t14:54:41.796z20021[crossref metadata search]1True
109896480000-0002-3675-687600abegailpalos-simbreNaN[gail]NaNNaNNaN...2017-02-10t16:38:52.988z2019-12-11t01:37:15.405z00000[]0False
\n", + "

10989649 rows × 26 columns

\n", + "
" + ], + "text/plain": [ + " orcid verified_email verified_primary_email \\\n", + "0 0000-0002-7790-0483 1 0 \n", + "1 0000-0001-6368-0531 0 0 \n", + "2 0000-0001-8149-4900 1 1 \n", + "3 0000-0002-8684-2422 0 0 \n", + "4 0000-0003-4814-7872 1 1 \n", + "... ... ... ... \n", + "10989644 0000-0001-7468-9881 1 1 \n", + "10989645 0000-0003-0081-4285 1 1 \n", + "10989646 0000-0003-2004-3457 0 0 \n", + "10989647 0000-0003-2841-9754 1 1 \n", + "10989648 0000-0002-3675-6876 0 0 \n", + "\n", + " given_names family_name biography other_names urls primary_email \\\n", + "0 abel elias NaN NaN NaN NaN \n", + "1 abelardo ramirez NaN NaN NaN NaN \n", + "2 abelardo mancinas NaN NaN NaN NaN \n", + "3 abera nigussie NaN NaN NaN NaN \n", + "4 abhijeet singh NaN NaN NaN NaN \n", + "... ... ... ... ... ... ... \n", + "10989644 abeer elbaroudi NaN NaN NaN NaN \n", + "10989645 abeer sohrab NaN NaN NaN NaN \n", + "10989646 abeer abdelmaksoud NaN NaN NaN NaN \n", + "10989647 abeer al-ghazali NaN NaN NaN NaN \n", + "10989648 abegail palos-simbre NaN [gail] NaN NaN \n", + "\n", + " other_emails ... activation_date \\\n", + "0 NaN ... 2020-09-16t16:51:54.155z \n", + "1 NaN ... 2017-05-10t19:28:13.217z \n", + "2 NaN ... 2018-10-15t21:46:52.162z \n", + "3 NaN ... 2020-09-23t08:36:17.451z \n", + "4 NaN ... 2018-05-01t22:43:17.407z \n", + "... ... ... ... \n", + "10989644 NaN ... 2020-02-06t15:04:42.485z \n", + "10989645 NaN ... 2020-05-12t22:39:26.356z \n", + "10989646 NaN ... 2019-12-19t23:09:12.579z \n", + "10989647 NaN ... 2019-06-02t18:35:32.973z \n", + "10989648 NaN ... 2017-02-10t16:38:52.988z \n", + "\n", + " last_update_date n_doi n_arxiv n_pmc n_other_pids label \\\n", + "0 2020-09-16t17:00:08.451z 0 0 0 0 0 \n", + "1 2017-05-10t19:28:17.315z 0 0 0 0 0 \n", + "2 2020-01-13t03:33:47.645z 0 0 0 0 0 \n", + "3 2020-09-23t08:36:17.450z 0 0 0 0 0 \n", + "4 2018-10-06t22:21:54.024z 0 0 0 0 0 \n", + "... ... ... ... ... ... ... \n", + "10989644 2020-02-06t15:16:45.537z 0 0 0 0 0 \n", + "10989645 2020-05-12t22:41:45.239z 0 0 0 0 0 \n", + "10989646 2019-12-19t23:09:12.798z 0 0 0 0 0 \n", + "10989647 2019-08-05t14:54:41.796z 2 0 0 2 1 \n", + "10989648 2019-12-11t01:37:15.405z 0 0 0 0 0 \n", + "\n", + " ext_works_source n_ext_work_source authoritative \n", + "0 [] 0 False \n", + "1 [] 0 False \n", + "2 [] 0 False \n", + "3 [] 0 False \n", + "4 [] 0 False \n", + "... ... ... ... \n", + "10989644 [] 0 False \n", + "10989645 [] 0 False \n", + "10989646 [] 0 False \n", + "10989647 [crossref metadata search] 1 True \n", + "10989648 [] 0 False \n", + "\n", + "[10989649 rows x 26 columns]" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "veterinary-phrase", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "coloraxis": "coloraxis", + "hovertemplate": "x: %{x}
y: %{y}
color: %{z}", + "name": "0", + "type": "heatmap", + "x": [ + "verified_email", + "verified_primary_email", + "n_works", + "n_doi", + "n_arxiv", + "n_pmc", + "n_other_pids", + "label", + "n_ext_work_source", + "authoritative" + ], + "xaxis": "x", + "y": [ + "verified_email", + "verified_primary_email", + "n_works", + "n_doi", + "n_arxiv", + "n_pmc", + "n_other_pids", + "label", + "n_ext_work_source", + "authoritative" + ], + "yaxis": "y", + "z": [ + [ + 1, + 0.9649829131837351, + 0.07899833525810977, + 0.07259719921935899, + 0.006461363868256276, + 0.030614701011724168, + 0.06062464201233044, + 0.1531839773366329, + 0.1919719557229596, + 0.21531668352175948 + ], + [ + 0.9649829131837351, + 1, + 0.08183974046701105, + 0.07518160639621922, + 0.006686059029180166, + 0.03171235345994569, + 0.06277678931007252, + 0.15995695182918668, + 0.1981210698185993, + 0.22184413814951587 + ], + [ + 0.07899833525810977, + 0.08183974046701105, + 1, + 0.9378726254396619, + 0.31262992500470826, + 0.3510856389397645, + 0.8353346326814892, + 0.22974076078503264, + 0.42502019390055656, + 0.2990392382833506 + ], + [ + 0.07259719921935899, + 0.07518160639621922, + 0.9378726254396619, + 1, + 0.35605399617713956, + 0.3624050122938356, + 0.801819617534692, + 0.2133388352039022, + 0.41375193880464456, + 0.28780401348168333 + ], + [ + 0.006461363868256276, + 0.006686059029180166, + 0.31262992500470826, + 0.35605399617713956, + 1, + 0.0009072282179230607, + 0.2420914875525837, + 0.01939797095250517, + 0.021262173261030495, + 0.02440100048344857 + ], + [ + 0.030614701011724168, + 0.03171235345994569, + 0.3510856389397645, + 0.3624050122938356, + 0.0009072282179230607, + 1, + 0.2570742999530638, + 0.08736856703205036, + 0.16873991088778023, + 0.11447380021013033 + ], + [ + 0.06062464201233044, + 0.06277678931007252, + 0.8353346326814892, + 0.801819617534692, + 0.2420914875525837, + 0.2570742999530638, + 1, + 0.17528852589870983, + 0.3572799642364996, + 0.24303586233733107 + ], + [ + 0.1531839773366329, + 0.15995695182918668, + 0.22974076078503264, + 0.2133388352039022, + 0.01939797095250517, + 0.08736856703205036, + 0.17528852589870983, + 1, + 0.49221037696497033, + 0.5245689815824116 + ], + [ + 0.1919719557229596, + 0.1981210698185993, + 0.42502019390055656, + 0.41375193880464456, + 0.021262173261030495, + 0.16873991088778023, + 0.3572799642364996, + 0.49221037696497033, + 1, + 0.8380242299586107 + ], + [ + 0.21531668352175948, + 0.22184413814951587, + 0.2990392382833506, + 0.28780401348168333, + 0.02440100048344857, + 0.11447380021013033, + 0.24303586233733107, + 0.5245689815824116, + 0.8380242299586107, + 1 + ] + ] + } + ], + "layout": { + "coloraxis": { + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "margin": { + "t": 60 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "xaxis": { + "anchor": "y", + "constrain": "domain", + "domain": [ + 0, + 1 + ], + "scaleanchor": "y" + }, + "yaxis": { + "anchor": "x", + "autorange": "reversed", + "constrain": "domain", + "domain": [ + 0, + 1 + ] + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = px.imshow(df.fillna(0).corr())\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "outer-egyptian", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}