{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "subsequent-cornell", "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import glob\n", "\n", "import pandas as pd\n", "import ast\n", "import tldextract\n", "import numpy\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px\n", "\n", "init_notebook_mode(connected=True)\n", "TOP_N = 0\n", "TOP_RANGE = [0, 0]\n", "def set_top_n(n):\n", " global TOP_N, TOP_RANGE\n", " TOP_N = n\n", " TOP_RANGE = [-.5, n - 1 + .5]" ] }, { "cell_type": "code", "execution_count": 2, "id": "hydraulic-baker", "metadata": {}, "outputs": [], "source": [ "parts = glob.glob('/Users/miriam.baglioni/Develop/Gitea/fake-orcid-analysis-v2/fake-orcid-analysis/data/processed/dataset.pkl.*')" ] }, { "cell_type": "code", "execution_count": 3, "id": "lesbian-routine", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_emailother_emails...employmentn_worksworks_sourceactivation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabel
100000000000-0002-7790-048310abeleliasNaNNaNNaNNaNNaN...NaN0NaN2020-09-16t16:51:54.155z2020-09-16t17:00:08.451z00000
100000010000-0001-6368-053100abelardoramirezNaNNaNNaNNaNNaN...NaN0NaN2017-05-10t19:28:13.217z2017-05-10t19:28:17.315z00000
100000020000-0001-8149-490011abelardomancinasNaNNaNNaNNaNNaN...[[profesor investigador, instituto tecnológico...0NaN2018-10-15t21:46:52.162z2020-01-13t03:33:47.645z00000
100000030000-0002-8684-242200aberanigussieNaNNaNNaNNaNNaN...NaN0NaN2020-09-23t08:36:17.451z2020-09-23t08:36:17.450z00000
100000040000-0003-4814-787211abhijeetsinghNaNNaNNaNNaNNaN...NaN0NaN2018-05-01t22:43:17.407z2018-10-06t22:21:54.024z00000
\n", "

5 rows × 23 columns

\n", "
" ], "text/plain": [ " orcid verified_email verified_primary_email \\\n", "10000000 0000-0002-7790-0483 1 0 \n", "10000001 0000-0001-6368-0531 0 0 \n", "10000002 0000-0001-8149-4900 1 1 \n", "10000003 0000-0002-8684-2422 0 0 \n", "10000004 0000-0003-4814-7872 1 1 \n", "\n", " given_names family_name biography other_names urls primary_email \\\n", "10000000 abel elias NaN NaN NaN NaN \n", "10000001 abelardo ramirez NaN NaN NaN NaN \n", "10000002 abelardo mancinas NaN NaN NaN NaN \n", "10000003 abera nigussie NaN NaN NaN NaN \n", "10000004 abhijeet singh NaN NaN NaN NaN \n", "\n", " other_emails ... employment \\\n", "10000000 NaN ... NaN \n", "10000001 NaN ... NaN \n", "10000002 NaN ... [[profesor investigador, instituto tecnológico... \n", "10000003 NaN ... NaN \n", "10000004 NaN ... NaN \n", "\n", " n_works works_source activation_date \\\n", "10000000 0 NaN 2020-09-16t16:51:54.155z \n", "10000001 0 NaN 2017-05-10t19:28:13.217z \n", "10000002 0 NaN 2018-10-15t21:46:52.162z \n", "10000003 0 NaN 2020-09-23t08:36:17.451z \n", "10000004 0 NaN 2018-05-01t22:43:17.407z \n", "\n", " last_update_date n_doi n_arxiv n_pmc n_other_pids label \n", "10000000 2020-09-16t17:00:08.451z 0 0 0 0 0 \n", "10000001 2017-05-10t19:28:17.315z 0 0 0 0 0 \n", "10000002 2020-01-13t03:33:47.645z 0 0 0 0 0 \n", "10000003 2020-09-23t08:36:17.450z 0 0 0 0 0 \n", "10000004 2018-10-06t22:21:54.024z 0 0 0 0 0 \n", "\n", "[5 rows x 23 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.concat((pd.read_pickle(part) for part in parts))\n", "df.head(5)" ] }, { "cell_type": "code", "execution_count": 4, "id": "olympic-missile", "metadata": {}, "outputs": [], "source": [ "def remove_own_source(lst, given, family):\n", " res = []\n", " if isinstance(lst, list) and pd.notna(given):\n", " for ws in lst:\n", " if ws.lower().find(given.lower()) == -1:\n", " if pd.notna(family):\n", " if ws.lower().find(family.lower()) == -1:\n", " res.append(ws)\n", " else:\n", " res.append(ws)\n", " return res" ] }, { "cell_type": "code", "execution_count": 5, "id": "informational-carrier", "metadata": {}, "outputs": [], "source": [ "df['ext_works_source'] = df.apply(lambda x: remove_own_source(x['works_source'], x['given_names'], x['family_name']), axis=1)" ] }, { "cell_type": "code", "execution_count": 6, "id": "hydraulic-pharmaceutical", "metadata": {}, "outputs": [], "source": [ "df['n_ext_work_source'] = df.ext_works_source.str.len()" ] }, { "cell_type": "code", "execution_count": 7, "id": "tropical-stockholm", "metadata": {}, "outputs": [], "source": [ "exploded_external_sources = df[df['ext_works_source'].str.len() > 0][['orcid','ext_works_source']].explode('ext_works_source').reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 8, "id": "enhanced-blanket", "metadata": {}, "outputs": [], "source": [ "grouped_ext_sources = exploded_external_sources.groupby('ext_works_source').count().sort_values('orcid', ascending=False).reset_index()\n" ] }, { "cell_type": "code", "execution_count": 44, "id": "black-congo", "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "crossref", "scopus - elsevier", "crossref metadata search", "multidisciplinary digital publishing institute", "europe pubmed central", "researcherid", "publons", "ciênciavitae", "base - bielefeld academic search engine", "datacite", "redalyc", "mla international bibliography", "deutsche nationalbibliothek (dnb)", "nasa astrophysics data system", "national information processing institute ", "f1000", "inspire-hep", "university of helsinki", "hal", "igi global", "airiti", "university of copenhagen", "universidade federal de uberlândia", "aarhus university", "universidad del país vasco", "university of manchester - pure", "kings college london", "university of southern denmark", "wellcome open research", "macquarie university" ], "y": [ 1460841, 902231, 297684, 281664, 181605, 158148, 39786, 32315, 20699, 16107, 9640, 8059, 7855, 7403, 6509, 5221, 4872, 4152, 4136, 3833, 3725, 3127, 2718, 2311, 2271, 2227, 2199, 2185, 2113, 2053 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Top 30 works_source" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "\n", "data = [\n", " go.Bar(\n", " x=grouped_ext_sources[:30].ext_works_source,\n", " y=grouped_ext_sources[:30].orcid\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Top 30 works_source',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "code", "execution_count": 9, "id": "sophisticated-madness", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ext_works_sourceorcid
0crossref1460841
1scopus - elsevier902231
2crossref metadata search297684
3multidisciplinary digital publishing institute281664
4europe pubmed central181605
.........
337uta - oa journal global insight3
338francis crick institute3
339anna3
340santos3
341universitäts- und stadtbibliothek köln3
\n", "

342 rows × 2 columns

\n", "
" ], "text/plain": [ " ext_works_source orcid\n", "0 crossref 1460841\n", "1 scopus - elsevier 902231\n", "2 crossref metadata search 297684\n", "3 multidisciplinary digital publishing institute 281664\n", "4 europe pubmed central 181605\n", ".. ... ...\n", "337 uta - oa journal global insight 3\n", "338 francis crick institute 3\n", "339 anna 3\n", "340 santos 3\n", "341 universitäts- und stadtbibliothek köln 3\n", "\n", "[342 rows x 2 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "authoritative_sources = grouped_ext_sources[grouped_ext_sources['orcid'] > 2]\n", "authoritative_sources" ] }, { "cell_type": "code", "execution_count": 30, "id": "fifty-translator", "metadata": {}, "outputs": [], "source": [ "exploded_external_sources['authoritative'] = exploded_external_sources.ext_works_source.isin(authoritative_sources['ext_works_source'])" ] }, { "cell_type": "code", "execution_count": 57, "id": "sweet-silicon", "metadata": {}, "outputs": [], "source": [ "orcid_authoritative_source = exploded_external_sources.groupby('orcid')['authoritative'].any().reset_index()[['orcid', 'authoritative']]" ] }, { "cell_type": "code", "execution_count": 64, "id": "iraqi-million", "metadata": {}, "outputs": [], "source": [ "df = df.set_index('orcid').join(orcid_authoritative_source.set_index('orcid')).reset_index()" ] }, { "cell_type": "code", "execution_count": 65, "id": "current-convergence", "metadata": {}, "outputs": [], "source": [ "df.loc[df.authoritative.isna(), 'authoritative'] = False" ] }, { "cell_type": "code", "execution_count": 66, "id": "median-smith", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orcidverified_emailverified_primary_emailgiven_namesfamily_namebiographyother_namesurlsprimary_emailother_emails...activation_datelast_update_daten_doin_arxivn_pmcn_other_pidslabelext_works_sourcen_ext_work_sourceauthoritative
00000-0002-7790-048310abeleliasNaNNaNNaNNaNNaN...2020-09-16t16:51:54.155z2020-09-16t17:00:08.451z00000[]0False
10000-0001-6368-053100abelardoramirezNaNNaNNaNNaNNaN...2017-05-10t19:28:13.217z2017-05-10t19:28:17.315z00000[]0False
20000-0001-8149-490011abelardomancinasNaNNaNNaNNaNNaN...2018-10-15t21:46:52.162z2020-01-13t03:33:47.645z00000[]0False
30000-0002-8684-242200aberanigussieNaNNaNNaNNaNNaN...2020-09-23t08:36:17.451z2020-09-23t08:36:17.450z00000[]0False
40000-0003-4814-787211abhijeetsinghNaNNaNNaNNaNNaN...2018-05-01t22:43:17.407z2018-10-06t22:21:54.024z00000[]0False
..................................................................
109896440000-0001-7468-988111abeerelbaroudiNaNNaNNaNNaNNaN...2020-02-06t15:04:42.485z2020-02-06t15:16:45.537z00000[]0False
109896450000-0003-0081-428511abeersohrabNaNNaNNaNNaNNaN...2020-05-12t22:39:26.356z2020-05-12t22:41:45.239z00000[]0False
109896460000-0003-2004-345700abeerabdelmaksoudNaNNaNNaNNaNNaN...2019-12-19t23:09:12.579z2019-12-19t23:09:12.798z00000[]0False
109896470000-0003-2841-975411abeeral-ghazaliNaNNaNNaNNaNNaN...2019-06-02t18:35:32.973z2019-08-05t14:54:41.796z20021[crossref metadata search]1True
109896480000-0002-3675-687600abegailpalos-simbreNaN[gail]NaNNaNNaN...2017-02-10t16:38:52.988z2019-12-11t01:37:15.405z00000[]0False
\n", "

10989649 rows × 26 columns

\n", "
" ], "text/plain": [ " orcid verified_email verified_primary_email \\\n", "0 0000-0002-7790-0483 1 0 \n", "1 0000-0001-6368-0531 0 0 \n", "2 0000-0001-8149-4900 1 1 \n", "3 0000-0002-8684-2422 0 0 \n", "4 0000-0003-4814-7872 1 1 \n", "... ... ... ... \n", "10989644 0000-0001-7468-9881 1 1 \n", "10989645 0000-0003-0081-4285 1 1 \n", "10989646 0000-0003-2004-3457 0 0 \n", "10989647 0000-0003-2841-9754 1 1 \n", "10989648 0000-0002-3675-6876 0 0 \n", "\n", " given_names family_name biography other_names urls primary_email \\\n", "0 abel elias NaN NaN NaN NaN \n", "1 abelardo ramirez NaN NaN NaN NaN \n", "2 abelardo mancinas NaN NaN NaN NaN \n", "3 abera nigussie NaN NaN NaN NaN \n", "4 abhijeet singh NaN NaN NaN NaN \n", "... ... ... ... ... ... ... \n", "10989644 abeer elbaroudi NaN NaN NaN NaN \n", "10989645 abeer sohrab NaN NaN NaN NaN \n", "10989646 abeer abdelmaksoud NaN NaN NaN NaN \n", "10989647 abeer al-ghazali NaN NaN NaN NaN \n", "10989648 abegail palos-simbre NaN [gail] NaN NaN \n", "\n", " other_emails ... activation_date \\\n", "0 NaN ... 2020-09-16t16:51:54.155z \n", "1 NaN ... 2017-05-10t19:28:13.217z \n", "2 NaN ... 2018-10-15t21:46:52.162z \n", "3 NaN ... 2020-09-23t08:36:17.451z \n", "4 NaN ... 2018-05-01t22:43:17.407z \n", "... ... ... ... \n", "10989644 NaN ... 2020-02-06t15:04:42.485z \n", "10989645 NaN ... 2020-05-12t22:39:26.356z \n", "10989646 NaN ... 2019-12-19t23:09:12.579z \n", "10989647 NaN ... 2019-06-02t18:35:32.973z \n", "10989648 NaN ... 2017-02-10t16:38:52.988z \n", "\n", " last_update_date n_doi n_arxiv n_pmc n_other_pids label \\\n", "0 2020-09-16t17:00:08.451z 0 0 0 0 0 \n", "1 2017-05-10t19:28:17.315z 0 0 0 0 0 \n", "2 2020-01-13t03:33:47.645z 0 0 0 0 0 \n", "3 2020-09-23t08:36:17.450z 0 0 0 0 0 \n", "4 2018-10-06t22:21:54.024z 0 0 0 0 0 \n", "... ... ... ... ... ... ... \n", "10989644 2020-02-06t15:16:45.537z 0 0 0 0 0 \n", "10989645 2020-05-12t22:41:45.239z 0 0 0 0 0 \n", "10989646 2019-12-19t23:09:12.798z 0 0 0 0 0 \n", "10989647 2019-08-05t14:54:41.796z 2 0 0 2 1 \n", "10989648 2019-12-11t01:37:15.405z 0 0 0 0 0 \n", "\n", " ext_works_source n_ext_work_source authoritative \n", "0 [] 0 False \n", "1 [] 0 False \n", "2 [] 0 False \n", "3 [] 0 False \n", "4 [] 0 False \n", "... ... ... ... \n", "10989644 [] 0 False \n", "10989645 [] 0 False \n", "10989646 [] 0 False \n", "10989647 [crossref metadata search] 1 True \n", "10989648 [] 0 False \n", "\n", "[10989649 rows x 26 columns]" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 67, "id": "veterinary-phrase", "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "coloraxis": "coloraxis", "hovertemplate": "x: %{x}
y: %{y}
color: %{z}", "name": "0", "type": "heatmap", "x": [ "verified_email", "verified_primary_email", "n_works", "n_doi", "n_arxiv", "n_pmc", "n_other_pids", "label", "n_ext_work_source", "authoritative" ], "xaxis": "x", "y": [ "verified_email", "verified_primary_email", "n_works", "n_doi", "n_arxiv", "n_pmc", "n_other_pids", "label", "n_ext_work_source", "authoritative" ], "yaxis": "y", "z": [ [ 1, 0.9649829131837351, 0.07899833525810977, 0.07259719921935899, 0.006461363868256276, 0.030614701011724168, 0.06062464201233044, 0.1531839773366329, 0.1919719557229596, 0.21531668352175948 ], [ 0.9649829131837351, 1, 0.08183974046701105, 0.07518160639621922, 0.006686059029180166, 0.03171235345994569, 0.06277678931007252, 0.15995695182918668, 0.1981210698185993, 0.22184413814951587 ], [ 0.07899833525810977, 0.08183974046701105, 1, 0.9378726254396619, 0.31262992500470826, 0.3510856389397645, 0.8353346326814892, 0.22974076078503264, 0.42502019390055656, 0.2990392382833506 ], [ 0.07259719921935899, 0.07518160639621922, 0.9378726254396619, 1, 0.35605399617713956, 0.3624050122938356, 0.801819617534692, 0.2133388352039022, 0.41375193880464456, 0.28780401348168333 ], [ 0.006461363868256276, 0.006686059029180166, 0.31262992500470826, 0.35605399617713956, 1, 0.0009072282179230607, 0.2420914875525837, 0.01939797095250517, 0.021262173261030495, 0.02440100048344857 ], [ 0.030614701011724168, 0.03171235345994569, 0.3510856389397645, 0.3624050122938356, 0.0009072282179230607, 1, 0.2570742999530638, 0.08736856703205036, 0.16873991088778023, 0.11447380021013033 ], [ 0.06062464201233044, 0.06277678931007252, 0.8353346326814892, 0.801819617534692, 0.2420914875525837, 0.2570742999530638, 1, 0.17528852589870983, 0.3572799642364996, 0.24303586233733107 ], [ 0.1531839773366329, 0.15995695182918668, 0.22974076078503264, 0.2133388352039022, 0.01939797095250517, 0.08736856703205036, 0.17528852589870983, 1, 0.49221037696497033, 0.5245689815824116 ], [ 0.1919719557229596, 0.1981210698185993, 0.42502019390055656, 0.41375193880464456, 0.021262173261030495, 0.16873991088778023, 0.3572799642364996, 0.49221037696497033, 1, 0.8380242299586107 ], [ 0.21531668352175948, 0.22184413814951587, 0.2990392382833506, 0.28780401348168333, 0.02440100048344857, 0.11447380021013033, 0.24303586233733107, 0.5245689815824116, 0.8380242299586107, 1 ] ] } ], "layout": { "coloraxis": { "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "margin": { "t": 60 }, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "xaxis": { "anchor": "y", "constrain": "domain", "domain": [ 0, 1 ], "scaleanchor": "y" }, "yaxis": { "anchor": "x", "autorange": "reversed", "constrain": "domain", "domain": [ 0, 1 ] } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = px.imshow(df.fillna(0).corr())\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "outer-egyptian", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 5 }