diff --git a/notebooks/01.1-Exploration_WorksSource.ipynb b/notebooks/01.1-Exploration_WorksSource.ipynb
new file mode 100644
index 0000000..4156dd5
--- /dev/null
+++ b/notebooks/01.1-Exploration_WorksSource.ipynb
@@ -0,0 +1,2967 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "subsequent-cornell",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import glob\n",
+ "\n",
+ "import pandas as pd\n",
+ "import ast\n",
+ "import tldextract\n",
+ "import numpy\n",
+ "\n",
+ "import plotly\n",
+ "from plotly.offline import iplot, init_notebook_mode\n",
+ "import plotly.graph_objs as go\n",
+ "import plotly.express as px\n",
+ "\n",
+ "init_notebook_mode(connected=True)\n",
+ "TOP_N = 0\n",
+ "TOP_RANGE = [0, 0]\n",
+ "def set_top_n(n):\n",
+ " global TOP_N, TOP_RANGE\n",
+ " TOP_N = n\n",
+ " TOP_RANGE = [-.5, n - 1 + .5]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "hydraulic-baker",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "parts = glob.glob('/Users/miriam.baglioni/Develop/Gitea/fake-orcid-analysis-v2/fake-orcid-analysis/data/processed/dataset.pkl.*')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "lesbian-routine",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " orcid | \n",
+ " verified_email | \n",
+ " verified_primary_email | \n",
+ " given_names | \n",
+ " family_name | \n",
+ " biography | \n",
+ " other_names | \n",
+ " urls | \n",
+ " primary_email | \n",
+ " other_emails | \n",
+ " ... | \n",
+ " employment | \n",
+ " n_works | \n",
+ " works_source | \n",
+ " activation_date | \n",
+ " last_update_date | \n",
+ " n_doi | \n",
+ " n_arxiv | \n",
+ " n_pmc | \n",
+ " n_other_pids | \n",
+ " label | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 10000000 | \n",
+ " 0000-0002-7790-0483 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " abel | \n",
+ " elias | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " 2020-09-16t16:51:54.155z | \n",
+ " 2020-09-16t17:00:08.451z | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 10000001 | \n",
+ " 0000-0001-6368-0531 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " abelardo | \n",
+ " ramirez | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " 2017-05-10t19:28:13.217z | \n",
+ " 2017-05-10t19:28:17.315z | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 10000002 | \n",
+ " 0000-0001-8149-4900 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " abelardo | \n",
+ " mancinas | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " [[profesor investigador, instituto tecnológico... | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " 2018-10-15t21:46:52.162z | \n",
+ " 2020-01-13t03:33:47.645z | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 10000003 | \n",
+ " 0000-0002-8684-2422 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " abera | \n",
+ " nigussie | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " 2020-09-23t08:36:17.451z | \n",
+ " 2020-09-23t08:36:17.450z | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 10000004 | \n",
+ " 0000-0003-4814-7872 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " abhijeet | \n",
+ " singh | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " NaN | \n",
+ " 0 | \n",
+ " NaN | \n",
+ " 2018-05-01t22:43:17.407z | \n",
+ " 2018-10-06t22:21:54.024z | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 23 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " orcid verified_email verified_primary_email \\\n",
+ "10000000 0000-0002-7790-0483 1 0 \n",
+ "10000001 0000-0001-6368-0531 0 0 \n",
+ "10000002 0000-0001-8149-4900 1 1 \n",
+ "10000003 0000-0002-8684-2422 0 0 \n",
+ "10000004 0000-0003-4814-7872 1 1 \n",
+ "\n",
+ " given_names family_name biography other_names urls primary_email \\\n",
+ "10000000 abel elias NaN NaN NaN NaN \n",
+ "10000001 abelardo ramirez NaN NaN NaN NaN \n",
+ "10000002 abelardo mancinas NaN NaN NaN NaN \n",
+ "10000003 abera nigussie NaN NaN NaN NaN \n",
+ "10000004 abhijeet singh NaN NaN NaN NaN \n",
+ "\n",
+ " other_emails ... employment \\\n",
+ "10000000 NaN ... NaN \n",
+ "10000001 NaN ... NaN \n",
+ "10000002 NaN ... [[profesor investigador, instituto tecnológico... \n",
+ "10000003 NaN ... NaN \n",
+ "10000004 NaN ... NaN \n",
+ "\n",
+ " n_works works_source activation_date \\\n",
+ "10000000 0 NaN 2020-09-16t16:51:54.155z \n",
+ "10000001 0 NaN 2017-05-10t19:28:13.217z \n",
+ "10000002 0 NaN 2018-10-15t21:46:52.162z \n",
+ "10000003 0 NaN 2020-09-23t08:36:17.451z \n",
+ "10000004 0 NaN 2018-05-01t22:43:17.407z \n",
+ "\n",
+ " last_update_date n_doi n_arxiv n_pmc n_other_pids label \n",
+ "10000000 2020-09-16t17:00:08.451z 0 0 0 0 0 \n",
+ "10000001 2017-05-10t19:28:17.315z 0 0 0 0 0 \n",
+ "10000002 2020-01-13t03:33:47.645z 0 0 0 0 0 \n",
+ "10000003 2020-09-23t08:36:17.450z 0 0 0 0 0 \n",
+ "10000004 2018-10-06t22:21:54.024z 0 0 0 0 0 \n",
+ "\n",
+ "[5 rows x 23 columns]"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df = pd.concat((pd.read_pickle(part) for part in parts))\n",
+ "df.head(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "olympic-missile",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def remove_own_source(lst, given, family):\n",
+ " res = []\n",
+ " if isinstance(lst, list) and pd.notna(given):\n",
+ " for ws in lst:\n",
+ " if ws.lower().find(given.lower()) == -1:\n",
+ " if pd.notna(family):\n",
+ " if ws.lower().find(family.lower()) == -1:\n",
+ " res.append(ws)\n",
+ " else:\n",
+ " res.append(ws)\n",
+ " return res"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "informational-carrier",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df['ext_works_source'] = df.apply(lambda x: remove_own_source(x['works_source'], x['given_names'], x['family_name']), axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "hydraulic-pharmaceutical",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df['n_ext_work_source'] = df.ext_works_source.str.len()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "tropical-stockholm",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "exploded_external_sources = df[df['ext_works_source'].str.len() > 0][['orcid','ext_works_source']].explode('ext_works_source').reset_index(drop=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "enhanced-blanket",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "grouped_ext_sources = exploded_external_sources.groupby('ext_works_source').count().sort_values('orcid', ascending=False).reset_index()\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "black-congo",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "linkText": "Export to plot.ly",
+ "plotlyServerURL": "https://plot.ly",
+ "showLink": false
+ },
+ "data": [
+ {
+ "type": "bar",
+ "x": [
+ "crossref",
+ "scopus - elsevier",
+ "crossref metadata search",
+ "multidisciplinary digital publishing institute",
+ "europe pubmed central",
+ "researcherid",
+ "publons",
+ "ciênciavitae",
+ "base - bielefeld academic search engine",
+ "datacite",
+ "redalyc",
+ "mla international bibliography",
+ "deutsche nationalbibliothek (dnb)",
+ "nasa astrophysics data system",
+ "national information processing institute ",
+ "f1000",
+ "inspire-hep",
+ "university of helsinki",
+ "hal",
+ "igi global",
+ "airiti",
+ "university of copenhagen",
+ "universidade federal de uberlândia",
+ "aarhus university",
+ "universidad del país vasco",
+ "university of manchester - pure",
+ "kings college london",
+ "university of southern denmark",
+ "wellcome open research",
+ "macquarie university"
+ ],
+ "y": [
+ 1460841,
+ 902231,
+ 297684,
+ 281664,
+ 181605,
+ 158148,
+ 39786,
+ 32315,
+ 20699,
+ 16107,
+ 9640,
+ 8059,
+ 7855,
+ 7403,
+ 6509,
+ 5221,
+ 4872,
+ 4152,
+ 4136,
+ 3833,
+ 3725,
+ 3127,
+ 2718,
+ 2311,
+ 2271,
+ 2227,
+ 2199,
+ 2185,
+ 2113,
+ 2053
+ ]
+ }
+ ],
+ "layout": {
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "text": "Top 30 works_source"
+ },
+ "xaxis": {
+ "tickangle": 45,
+ "tickfont": {
+ "size": 12
+ }
+ }
+ }
+ },
+ "text/html": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "\n",
+ "data = [\n",
+ " go.Bar(\n",
+ " x=grouped_ext_sources[:30].ext_works_source,\n",
+ " y=grouped_ext_sources[:30].orcid\n",
+ " )\n",
+ "]\n",
+ "\n",
+ "layout = go.Layout(\n",
+ " title='Top 30 works_source',\n",
+ " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
+ ")\n",
+ "fig = go.Figure(data=data, layout=layout)\n",
+ "plotly.offline.iplot(fig)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "sophisticated-madness",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ext_works_source | \n",
+ " orcid | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " crossref | \n",
+ " 1460841 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " scopus - elsevier | \n",
+ " 902231 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " crossref metadata search | \n",
+ " 297684 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " multidisciplinary digital publishing institute | \n",
+ " 281664 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " europe pubmed central | \n",
+ " 181605 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 337 | \n",
+ " uta - oa journal global insight | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 338 | \n",
+ " francis crick institute | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 339 | \n",
+ " anna | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 340 | \n",
+ " santos | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " 341 | \n",
+ " universitäts- und stadtbibliothek köln | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
342 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ext_works_source orcid\n",
+ "0 crossref 1460841\n",
+ "1 scopus - elsevier 902231\n",
+ "2 crossref metadata search 297684\n",
+ "3 multidisciplinary digital publishing institute 281664\n",
+ "4 europe pubmed central 181605\n",
+ ".. ... ...\n",
+ "337 uta - oa journal global insight 3\n",
+ "338 francis crick institute 3\n",
+ "339 anna 3\n",
+ "340 santos 3\n",
+ "341 universitäts- und stadtbibliothek köln 3\n",
+ "\n",
+ "[342 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "authoritative_sources = grouped_ext_sources[grouped_ext_sources['orcid'] > 2]\n",
+ "authoritative_sources"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "fifty-translator",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "exploded_external_sources['authoritative'] = exploded_external_sources.ext_works_source.isin(authoritative_sources['ext_works_source'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "id": "sweet-silicon",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "orcid_authoritative_source = exploded_external_sources.groupby('orcid')['authoritative'].any().reset_index()[['orcid', 'authoritative']]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "id": "iraqi-million",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = df.set_index('orcid').join(orcid_authoritative_source.set_index('orcid')).reset_index()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "id": "current-convergence",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.loc[df.authoritative.isna(), 'authoritative'] = False"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "id": "median-smith",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " orcid | \n",
+ " verified_email | \n",
+ " verified_primary_email | \n",
+ " given_names | \n",
+ " family_name | \n",
+ " biography | \n",
+ " other_names | \n",
+ " urls | \n",
+ " primary_email | \n",
+ " other_emails | \n",
+ " ... | \n",
+ " activation_date | \n",
+ " last_update_date | \n",
+ " n_doi | \n",
+ " n_arxiv | \n",
+ " n_pmc | \n",
+ " n_other_pids | \n",
+ " label | \n",
+ " ext_works_source | \n",
+ " n_ext_work_source | \n",
+ " authoritative | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0000-0002-7790-0483 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " abel | \n",
+ " elias | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 2020-09-16t16:51:54.155z | \n",
+ " 2020-09-16t17:00:08.451z | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " 0 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0000-0001-6368-0531 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " abelardo | \n",
+ " ramirez | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 2017-05-10t19:28:13.217z | \n",
+ " 2017-05-10t19:28:17.315z | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " 0 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0000-0001-8149-4900 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " abelardo | \n",
+ " mancinas | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 2018-10-15t21:46:52.162z | \n",
+ " 2020-01-13t03:33:47.645z | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " 0 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0000-0002-8684-2422 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " abera | \n",
+ " nigussie | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 2020-09-23t08:36:17.451z | \n",
+ " 2020-09-23t08:36:17.450z | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " 0 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0000-0003-4814-7872 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " abhijeet | \n",
+ " singh | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 2018-05-01t22:43:17.407z | \n",
+ " 2018-10-06t22:21:54.024z | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " 0 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10989644 | \n",
+ " 0000-0001-7468-9881 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " abeer | \n",
+ " elbaroudi | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 2020-02-06t15:04:42.485z | \n",
+ " 2020-02-06t15:16:45.537z | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " 0 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 10989645 | \n",
+ " 0000-0003-0081-4285 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " abeer | \n",
+ " sohrab | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 2020-05-12t22:39:26.356z | \n",
+ " 2020-05-12t22:41:45.239z | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " 0 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 10989646 | \n",
+ " 0000-0003-2004-3457 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " abeer | \n",
+ " abdelmaksoud | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 2019-12-19t23:09:12.579z | \n",
+ " 2019-12-19t23:09:12.798z | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " 0 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ " 10989647 | \n",
+ " 0000-0003-2841-9754 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " abeer | \n",
+ " al-ghazali | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 2019-06-02t18:35:32.973z | \n",
+ " 2019-08-05t14:54:41.796z | \n",
+ " 2 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " [crossref metadata search] | \n",
+ " 1 | \n",
+ " True | \n",
+ "
\n",
+ " \n",
+ " 10989648 | \n",
+ " 0000-0002-3675-6876 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " abegail | \n",
+ " palos-simbre | \n",
+ " NaN | \n",
+ " [gail] | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 2017-02-10t16:38:52.988z | \n",
+ " 2019-12-11t01:37:15.405z | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " [] | \n",
+ " 0 | \n",
+ " False | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10989649 rows × 26 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " orcid verified_email verified_primary_email \\\n",
+ "0 0000-0002-7790-0483 1 0 \n",
+ "1 0000-0001-6368-0531 0 0 \n",
+ "2 0000-0001-8149-4900 1 1 \n",
+ "3 0000-0002-8684-2422 0 0 \n",
+ "4 0000-0003-4814-7872 1 1 \n",
+ "... ... ... ... \n",
+ "10989644 0000-0001-7468-9881 1 1 \n",
+ "10989645 0000-0003-0081-4285 1 1 \n",
+ "10989646 0000-0003-2004-3457 0 0 \n",
+ "10989647 0000-0003-2841-9754 1 1 \n",
+ "10989648 0000-0002-3675-6876 0 0 \n",
+ "\n",
+ " given_names family_name biography other_names urls primary_email \\\n",
+ "0 abel elias NaN NaN NaN NaN \n",
+ "1 abelardo ramirez NaN NaN NaN NaN \n",
+ "2 abelardo mancinas NaN NaN NaN NaN \n",
+ "3 abera nigussie NaN NaN NaN NaN \n",
+ "4 abhijeet singh NaN NaN NaN NaN \n",
+ "... ... ... ... ... ... ... \n",
+ "10989644 abeer elbaroudi NaN NaN NaN NaN \n",
+ "10989645 abeer sohrab NaN NaN NaN NaN \n",
+ "10989646 abeer abdelmaksoud NaN NaN NaN NaN \n",
+ "10989647 abeer al-ghazali NaN NaN NaN NaN \n",
+ "10989648 abegail palos-simbre NaN [gail] NaN NaN \n",
+ "\n",
+ " other_emails ... activation_date \\\n",
+ "0 NaN ... 2020-09-16t16:51:54.155z \n",
+ "1 NaN ... 2017-05-10t19:28:13.217z \n",
+ "2 NaN ... 2018-10-15t21:46:52.162z \n",
+ "3 NaN ... 2020-09-23t08:36:17.451z \n",
+ "4 NaN ... 2018-05-01t22:43:17.407z \n",
+ "... ... ... ... \n",
+ "10989644 NaN ... 2020-02-06t15:04:42.485z \n",
+ "10989645 NaN ... 2020-05-12t22:39:26.356z \n",
+ "10989646 NaN ... 2019-12-19t23:09:12.579z \n",
+ "10989647 NaN ... 2019-06-02t18:35:32.973z \n",
+ "10989648 NaN ... 2017-02-10t16:38:52.988z \n",
+ "\n",
+ " last_update_date n_doi n_arxiv n_pmc n_other_pids label \\\n",
+ "0 2020-09-16t17:00:08.451z 0 0 0 0 0 \n",
+ "1 2017-05-10t19:28:17.315z 0 0 0 0 0 \n",
+ "2 2020-01-13t03:33:47.645z 0 0 0 0 0 \n",
+ "3 2020-09-23t08:36:17.450z 0 0 0 0 0 \n",
+ "4 2018-10-06t22:21:54.024z 0 0 0 0 0 \n",
+ "... ... ... ... ... ... ... \n",
+ "10989644 2020-02-06t15:16:45.537z 0 0 0 0 0 \n",
+ "10989645 2020-05-12t22:41:45.239z 0 0 0 0 0 \n",
+ "10989646 2019-12-19t23:09:12.798z 0 0 0 0 0 \n",
+ "10989647 2019-08-05t14:54:41.796z 2 0 0 2 1 \n",
+ "10989648 2019-12-11t01:37:15.405z 0 0 0 0 0 \n",
+ "\n",
+ " ext_works_source n_ext_work_source authoritative \n",
+ "0 [] 0 False \n",
+ "1 [] 0 False \n",
+ "2 [] 0 False \n",
+ "3 [] 0 False \n",
+ "4 [] 0 False \n",
+ "... ... ... ... \n",
+ "10989644 [] 0 False \n",
+ "10989645 [] 0 False \n",
+ "10989646 [] 0 False \n",
+ "10989647 [crossref metadata search] 1 True \n",
+ "10989648 [] 0 False \n",
+ "\n",
+ "[10989649 rows x 26 columns]"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "id": "veterinary-phrase",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "plotlyServerURL": "https://plot.ly"
+ },
+ "data": [
+ {
+ "coloraxis": "coloraxis",
+ "hovertemplate": "x: %{x}
y: %{y}
color: %{z}",
+ "name": "0",
+ "type": "heatmap",
+ "x": [
+ "verified_email",
+ "verified_primary_email",
+ "n_works",
+ "n_doi",
+ "n_arxiv",
+ "n_pmc",
+ "n_other_pids",
+ "label",
+ "n_ext_work_source",
+ "authoritative"
+ ],
+ "xaxis": "x",
+ "y": [
+ "verified_email",
+ "verified_primary_email",
+ "n_works",
+ "n_doi",
+ "n_arxiv",
+ "n_pmc",
+ "n_other_pids",
+ "label",
+ "n_ext_work_source",
+ "authoritative"
+ ],
+ "yaxis": "y",
+ "z": [
+ [
+ 1,
+ 0.9649829131837351,
+ 0.07899833525810977,
+ 0.07259719921935899,
+ 0.006461363868256276,
+ 0.030614701011724168,
+ 0.06062464201233044,
+ 0.1531839773366329,
+ 0.1919719557229596,
+ 0.21531668352175948
+ ],
+ [
+ 0.9649829131837351,
+ 1,
+ 0.08183974046701105,
+ 0.07518160639621922,
+ 0.006686059029180166,
+ 0.03171235345994569,
+ 0.06277678931007252,
+ 0.15995695182918668,
+ 0.1981210698185993,
+ 0.22184413814951587
+ ],
+ [
+ 0.07899833525810977,
+ 0.08183974046701105,
+ 1,
+ 0.9378726254396619,
+ 0.31262992500470826,
+ 0.3510856389397645,
+ 0.8353346326814892,
+ 0.22974076078503264,
+ 0.42502019390055656,
+ 0.2990392382833506
+ ],
+ [
+ 0.07259719921935899,
+ 0.07518160639621922,
+ 0.9378726254396619,
+ 1,
+ 0.35605399617713956,
+ 0.3624050122938356,
+ 0.801819617534692,
+ 0.2133388352039022,
+ 0.41375193880464456,
+ 0.28780401348168333
+ ],
+ [
+ 0.006461363868256276,
+ 0.006686059029180166,
+ 0.31262992500470826,
+ 0.35605399617713956,
+ 1,
+ 0.0009072282179230607,
+ 0.2420914875525837,
+ 0.01939797095250517,
+ 0.021262173261030495,
+ 0.02440100048344857
+ ],
+ [
+ 0.030614701011724168,
+ 0.03171235345994569,
+ 0.3510856389397645,
+ 0.3624050122938356,
+ 0.0009072282179230607,
+ 1,
+ 0.2570742999530638,
+ 0.08736856703205036,
+ 0.16873991088778023,
+ 0.11447380021013033
+ ],
+ [
+ 0.06062464201233044,
+ 0.06277678931007252,
+ 0.8353346326814892,
+ 0.801819617534692,
+ 0.2420914875525837,
+ 0.2570742999530638,
+ 1,
+ 0.17528852589870983,
+ 0.3572799642364996,
+ 0.24303586233733107
+ ],
+ [
+ 0.1531839773366329,
+ 0.15995695182918668,
+ 0.22974076078503264,
+ 0.2133388352039022,
+ 0.01939797095250517,
+ 0.08736856703205036,
+ 0.17528852589870983,
+ 1,
+ 0.49221037696497033,
+ 0.5245689815824116
+ ],
+ [
+ 0.1919719557229596,
+ 0.1981210698185993,
+ 0.42502019390055656,
+ 0.41375193880464456,
+ 0.021262173261030495,
+ 0.16873991088778023,
+ 0.3572799642364996,
+ 0.49221037696497033,
+ 1,
+ 0.8380242299586107
+ ],
+ [
+ 0.21531668352175948,
+ 0.22184413814951587,
+ 0.2990392382833506,
+ 0.28780401348168333,
+ 0.02440100048344857,
+ 0.11447380021013033,
+ 0.24303586233733107,
+ 0.5245689815824116,
+ 0.8380242299586107,
+ 1
+ ]
+ ]
+ }
+ ],
+ "layout": {
+ "coloraxis": {
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "margin": {
+ "t": 60
+ },
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "xaxis": {
+ "anchor": "y",
+ "constrain": "domain",
+ "domain": [
+ 0,
+ 1
+ ],
+ "scaleanchor": "y"
+ },
+ "yaxis": {
+ "anchor": "x",
+ "autorange": "reversed",
+ "constrain": "domain",
+ "domain": [
+ 0,
+ 1
+ ]
+ }
+ }
+ },
+ "text/html": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "fig = px.imshow(df.fillna(0).corr())\n",
+ "fig.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "outer-egyptian",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}