diff --git a/notebooks/01-Exploration.ipynb b/notebooks/01-Exploration.ipynb index a882c66..d3e2efa 100644 --- a/notebooks/01-Exploration.ipynb +++ b/notebooks/01-Exploration.ipynb @@ -130,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 86, "metadata": {}, "outputs": [], "source": [ @@ -562,74 +562,74 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 89, "metadata": {}, "outputs": [], "source": [ - "df.loc[df.other_names.notna(), 'other_names'] = df.loc[df.other_names.notna(), 'other_names'].apply(lambda x: ast.literal_eval(x))" + "df['other_names'] = df[df.other_names.notna()]['other_names'].apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 91, "metadata": {}, "outputs": [], "source": [ - "df.loc[df.keywords.notna(), 'keywords'] = df.loc[df.keywords.notna(), 'keywords'].apply(lambda x: ast.literal_eval(x))" + "df['keywords'] = df[df.keywords.notna()]['keywords'].apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 92, "metadata": {}, "outputs": [], "source": [ - "df.loc[df.urls.notna(), 'urls'] = df.loc[df.urls.notna(), 'urls'].apply(lambda x: ast.literal_eval(x))" + "df['urls'] = df[df.urls.notna()]['urls'].apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 93, "metadata": {}, "outputs": [], "source": [ - "df.loc[df.other_emails.notna(), 'other_emails'] = df.loc[df.other_emails.notna(), 'other_emails'].apply(lambda x: ast.literal_eval(x))" + "df['other_emails'] = df[df.other_emails.notna()]['other_emails'].apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 94, "metadata": {}, "outputs": [], "source": [ - "df.loc[df.education.notna(), 'education'] = df.loc[df.education.notna(), 'education'].apply(lambda x: ast.literal_eval(x))" + "df['education'] = df[df.education.notna()]['education'].apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 95, "metadata": {}, "outputs": [], "source": [ - "df.loc[df.employment.notna(), 'employment'] = df.loc[df.employment.notna(), 'employment'].apply(lambda x: ast.literal_eval(x))" + "df['employment'] = df[df.employment.notna()]['employment'].apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 96, "metadata": {}, "outputs": [], "source": [ - "df.loc[df.external_ids.notna(), 'external_ids'] = df.loc[df.external_ids.notna(), 'external_ids'].apply(lambda x: ast.literal_eval(x))" + "df['external_ids'] = df[df.external_ids.notna()]['external_ids'].apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 97, "metadata": {}, "outputs": [], "source": [ - "df.loc[df.works_source.notna(), 'works_source'] = df.loc[df.works_source.notna(), 'works_source'].apply(lambda x: ast.literal_eval(x))" + "df['works_source'] = df[df.works_source.notna()]['works_source'].apply(lambda x: ast.literal_eval(x))" ] }, { @@ -9100,16 +9100,16 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 84, "metadata": {}, "outputs": [], "source": [ - "df.loc[df['external_ids'].notna(), 'n_ids'] = df[df['external_ids'].notna()].external_ids.str.len()" + "df['n_ids'] = df[df['external_ids'].notna()].external_ids.str.len()" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 85, "metadata": {}, "outputs": [ { @@ -9126,7 +9126,7 @@ "Name: n_ids, dtype: float64" ] }, - "execution_count": 53, + "execution_count": 85, "metadata": {}, "output_type": "execute_result" } @@ -9489,362 +9489,6 @@ "ids[ids.provider.notna()].head()" ] }, - { - "cell_type": "code", - "execution_count": 67, - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
orcidexternal_ids
provider
Scopus Author ID10151751015175
ResearcherID543545543545
Loop profile114316114316
Ciência ID3387033870
Researcher Name Resolver ID78107810
中国科学家在线47944794
ISNI30543054
GND28252825
Pitt ID26722672
Technical University of Denmark CWIS24862486
Sciprofile24412441
Researcher ID14171417
ID Dialnet11671167
Digital author ID10791079
Scopus Author ID:10761076
AuthenticusID847847
HKU ResearcherPage740740
UOW Scholars644644
CTI Vitae581581
Scopus Author ID:549549
HKUST Profile521521
Scopus author ID501501
Chalmers ID430430
Scopus ID232232
iAuthor212212
Google Scholar200200
AuthID175175
DAI153153
US EPA VIVO146146
Digital Author ID (DAI)135135
Scopus ID127127
Authenticus8282
Smithsonian Profiles6161
GitHub5151
eScientist4949
VIVO Cornell4646
ResearcherID:3939
Digital Author ID3535
Digital author ID (DAI)3434
ID Dialnet:77
Dialnet ID66
KAKEN55
UNE Researcher ID44
ResearcherID:33
ORCID22
Custom11
ORCID iD11
ScienceOpen11
ResearcherId11
Profile system identifier11
\n", - "
" - ], - "text/plain": [ - " orcid external_ids\n", - "provider \n", - "Scopus Author ID 1015175 1015175\n", - "ResearcherID 543545 543545\n", - "Loop profile 114316 114316\n", - "Ciência ID 33870 33870\n", - "Researcher Name Resolver ID 7810 7810\n", - "中国科学家在线 4794 4794\n", - "ISNI 3054 3054\n", - "GND 2825 2825\n", - "Pitt ID 2672 2672\n", - "Technical University of Denmark CWIS 2486 2486\n", - "Sciprofile 2441 2441\n", - "Researcher ID 1417 1417\n", - "ID Dialnet 1167 1167\n", - "Digital author ID 1079 1079\n", - "Scopus Author ID: 1076 1076\n", - "AuthenticusID 847 847\n", - "HKU ResearcherPage 740 740\n", - "UOW Scholars 644 644\n", - "CTI Vitae 581 581\n", - "Scopus Author ID: 549 549\n", - "HKUST Profile 521 521\n", - "Scopus author ID 501 501\n", - "Chalmers ID 430 430\n", - "Scopus ID 232 232\n", - "iAuthor 212 212\n", - "Google Scholar 200 200\n", - "AuthID 175 175\n", - "DAI 153 153\n", - "US EPA VIVO 146 146\n", - "Digital Author ID (DAI) 135 135\n", - "Scopus ID 127 127\n", - "Authenticus 82 82\n", - "Smithsonian Profiles 61 61\n", - "GitHub 51 51\n", - "eScientist 49 49\n", - "VIVO Cornell 46 46\n", - "ResearcherID: 39 39\n", - "Digital Author ID 35 35\n", - "Digital author ID (DAI) 34 34\n", - "ID Dialnet: 7 7\n", - "Dialnet ID 6 6\n", - "KAKEN 5 5\n", - "UNE Researcher ID 4 4\n", - "ResearcherID: 3 3\n", - "ORCID 2 2\n", - "Custom 1 1\n", - "ORCID iD 1 1\n", - "ScienceOpen 1 1\n", - "ResearcherId 1 1\n", - "Profile system identifier 1 1" - ] - }, - "execution_count": 67, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ids.groupby('provider').count().sort_values('orcid', ascending=False)" - ] - }, { "cell_type": "code", "execution_count": 69, @@ -10868,32 +10512,1225 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "## Keywords" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 75, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "df['n_keywords'] = df.keywords.str.len()" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 80, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
orcidn_keywords
16813100000-0002-0673-0341154.0
77176990000-0002-7060-4112141.0
45976740000-0002-6075-3501140.0
20665800000-0002-4071-0301118.0
35310300000-0002-9638-8091115.0
.........
107470350000-0003-4998-1551NaN
107470360000-0003-4998-4111NaN
107470370000-0003-4998-6045NaN
107470380000-0003-4998-8868NaN
107470390000-0003-4999-7916NaN
\n", + "

10744621 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " orcid n_keywords\n", + "1681310 0000-0002-0673-0341 154.0\n", + "7717699 0000-0002-7060-4112 141.0\n", + "4597674 0000-0002-6075-3501 140.0\n", + "2066580 0000-0002-4071-0301 118.0\n", + "3531030 0000-0002-9638-8091 115.0\n", + "... ... ...\n", + "10747035 0000-0003-4998-1551 NaN\n", + "10747036 0000-0003-4998-4111 NaN\n", + "10747037 0000-0003-4998-6045 NaN\n", + "10747038 0000-0003-4998-8868 NaN\n", + "10747039 0000-0003-4999-7916 NaN\n", + "\n", + "[10744621 rows x 2 columns]" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.sort_values('n_keywords', ascending=False)[['orcid', 'n_keywords']]" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 83, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "linkText": "Export to plot.ly", + "plotlyServerURL": "https://plot.ly", + "showLink": false + }, + "data": [ + { + "type": "bar", + "x": [ + "0000-0002-0673-0341", + "0000-0002-7060-4112", + "0000-0002-6075-3501", + "0000-0002-4071-0301", + "0000-0002-9638-8091", + "0000-0002-4235-4259", + "0000-0001-9462-5666", + "0000-0003-0076-6287", + "0000-0002-1878-9762", + "0000-0001-6537-7683", + "0000-0001-6307-6027", + "0000-0003-2273-9888", + "0000-0003-1799-0971", + "0000-0001-5287-1949", + "0000-0002-0937-7061", + "0000-0001-9715-9357", + "0000-0001-5696-1052", + "0000-0003-2998-5520", + "0000-0001-5869-2204", + "0000-0002-0156-3580", + "0000-0002-9625-6742", + "0000-0002-8401-8018", + "0000-0001-9985-1697", + "0000-0003-4246-8579", + "0000-0002-7710-0355", + "0000-0002-8083-7382", + "0000-0001-7654-5013", + "0000-0001-6939-3859", + "0000-0002-3061-3364", + "0000-0003-2509-2549", + "0000-0002-0463-0048", + "0000-0001-5230-715X", + "0000-0001-5458-7167", + "0000-0001-9336-6850", + "0000-0003-0209-180X", + "0000-0002-8227-5387", + "0000-0002-9381-2264", + "0000-0003-3340-6413", + "0000-0003-3584-6834", + "0000-0002-2935-1934", + "0000-0002-8644-8396", + "0000-0002-8659-6321", + "0000-0002-3123-3021", + "0000-0001-5637-1124", + "0000-0001-5167-7466", + "0000-0002-8449-2211", + "0000-0003-2532-2906", + "0000-0002-3532-043X", + "0000-0002-2683-4527", + "0000-0003-4505-3678", + "0000-0002-6347-9464", + "0000-0003-4608-3844", + "0000-0003-4374-6374", + "0000-0003-4511-7942", + "0000-0002-1103-9651", + "0000-0003-3720-1183", + "0000-0001-9280-6017", + "0000-0003-4673-1063", + "0000-0001-9586-0780", + "0000-0002-5539-1761", + "0000-0003-2550-1859", + "0000-0002-8499-1045", + "0000-0003-2218-1343", + "0000-0002-5306-7781", + "0000-0003-1863-0265", + "0000-0002-8072-1152", + "0000-0003-3342-6123", + "0000-0002-2252-672X", + "0000-0002-3907-3552", + "0000-0001-6861-9561", + "0000-0002-3597-3350", + "0000-0002-5274-7742", + "0000-0002-3186-8860", + "0000-0001-8689-185X", + "0000-0002-6282-0640", + "0000-0003-3343-5660", + "0000-0001-7133-7848", + "0000-0003-4486-2684", + "0000-0002-9014-2090", + "0000-0001-6843-9325", + "0000-0003-0097-4182", + "0000-0003-1245-7705", + "0000-0002-4432-3448", + "0000-0001-7857-4133", + "0000-0002-1294-2156", + "0000-0001-8445-412X", + "0000-0002-1411-3028", + "0000-0003-3387-3193", + "0000-0002-1545-7818", + "0000-0002-9125-6022", + "0000-0002-3898-9542", + "0000-0002-3866-6460", + "0000-0003-4283-2895", + "0000-0003-4153-6779", + "0000-0002-4598-2891", + "0000-0002-0211-7195", + "0000-0002-1770-9660", + "0000-0002-1960-5857", + "0000-0003-2640-6757", + "0000-0002-5432-9595" + ], + "y": [ + 154, + 141, + 140, + 118, + 115, + 104, + 98, + 94, + 92, + 91, + 88, + 86, + 84, + 82, + 78, + 77, + 76, + 75, + 74, + 73, + 71, + 70, + 69, + 66, + 64, + 62, + 61, + 60, + 58, + 57, + 56, + 54, + 53, + 53, + 52, + 51, + 51, + 51, + 51, + 50, + 50, + 50, + 50, + 49, + 49, + 49, + 48, + 48, + 48, + 48, + 48, + 48, + 47, + 47, + 46, + 46, + 46, + 45, + 45, + 44, + 44, + 44, + 44, + 44, + 44, + 43, + 43, + 42, + 42, + 42, + 42, + 42, + 42, + 41, + 41, + 41, + 41, + 41, + 41, + 41, + 40, + 40, + 40, + 40, + 40, + 40, + 39, + 39, + 39, + 39, + 39, + 39, + 39, + 39, + 39, + 39, + 38, + 38, + 38, + 38 + ] + } + ], + "layout": { + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "title": { + "text": "Keywords provided" + }, + "xaxis": { + "tickangle": 45, + "tickfont": { + "size": 12 + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "data = [\n", + " go.Bar(\n", + " x=df.sort_values('n_keywords', ascending=False)['orcid'][:100],\n", + " y=df.sort_values('n_keywords', ascending=False)['n_keywords'][:100]\n", + " )\n", + "]\n", + "\n", + "layout = go.Layout(\n", + " title='Keywords provided',\n", + " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", + ")\n", + "fig = go.Figure(data=data, layout=layout)\n", + "plotly.offline.iplot(fig)" + ] }, { "cell_type": "markdown", @@ -10904,7 +11741,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 81, "metadata": {}, "outputs": [ { @@ -10926,7 +11763,8 @@ "n_works", "n_emails", "n_urls", - "n_ids" + "n_ids", + "n_keywords" ], "xaxis": "x", "y": [ @@ -10936,11 +11774,13 @@ "n_works", "n_emails", "n_urls", - "n_ids" + "n_ids", + "n_keywords" ], "yaxis": "y", "z": [ [ + null, null, null, null, @@ -10956,7 +11796,8 @@ 0.06481728326324665, 0.011268135706995959, 0.016235518285109687, - 0.08833871138587861 + 0.08833871138587861, + 0.017045184883982135 ], [ null, @@ -10965,7 +11806,8 @@ 0.0659649633755603, 0.009337798958686118, 0.0168692743777146, - 0.08953190210488794 + 0.08953190210488794, + 0.017626925761491493 ], [ null, @@ -10974,7 +11816,8 @@ 1, 0.05241643710974057, 0.05179828612278866, - 0.23927720131732308 + 0.23927720131732308, + 0.030960689715636 ], [ null, @@ -10983,7 +11826,8 @@ 0.05241643710974057, 1, 0.11119920854300894, - 0.04817263453943147 + 0.04817263453943147, + 0.04157758861961359 ], [ null, @@ -10992,7 +11836,8 @@ 0.05179828612278866, 0.11119920854300894, 1, - 0.06925708918455128 + 0.06925708918455128, + 0.15926017909633472 ], [ null, @@ -11001,6 +11846,17 @@ 0.23927720131732308, 0.04817263453943147, 0.06925708918455128, + 1, + 0.06320236481237387 + ], + [ + null, + 0.017045184883982135, + 0.017626925761491493, + 0.030960689715636, + 0.04157758861961359, + 0.15926017909633472, + 0.06320236481237387, 1 ] ] @@ -11881,9 +12737,9 @@ } }, "text/html": [ - "