diff --git a/notebooks/01-Exploration.ipynb b/notebooks/01-Exploration.ipynb
index a882c66..d3e2efa 100644
--- a/notebooks/01-Exploration.ipynb
+++ b/notebooks/01-Exploration.ipynb
@@ -130,7 +130,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 86,
"metadata": {},
"outputs": [],
"source": [
@@ -562,74 +562,74 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 89,
"metadata": {},
"outputs": [],
"source": [
- "df.loc[df.other_names.notna(), 'other_names'] = df.loc[df.other_names.notna(), 'other_names'].apply(lambda x: ast.literal_eval(x))"
+ "df['other_names'] = df[df.other_names.notna()]['other_names'].apply(lambda x: ast.literal_eval(x))"
]
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 91,
"metadata": {},
"outputs": [],
"source": [
- "df.loc[df.keywords.notna(), 'keywords'] = df.loc[df.keywords.notna(), 'keywords'].apply(lambda x: ast.literal_eval(x))"
+ "df['keywords'] = df[df.keywords.notna()]['keywords'].apply(lambda x: ast.literal_eval(x))"
]
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 92,
"metadata": {},
"outputs": [],
"source": [
- "df.loc[df.urls.notna(), 'urls'] = df.loc[df.urls.notna(), 'urls'].apply(lambda x: ast.literal_eval(x))"
+ "df['urls'] = df[df.urls.notna()]['urls'].apply(lambda x: ast.literal_eval(x))"
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 93,
"metadata": {},
"outputs": [],
"source": [
- "df.loc[df.other_emails.notna(), 'other_emails'] = df.loc[df.other_emails.notna(), 'other_emails'].apply(lambda x: ast.literal_eval(x))"
+ "df['other_emails'] = df[df.other_emails.notna()]['other_emails'].apply(lambda x: ast.literal_eval(x))"
]
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 94,
"metadata": {},
"outputs": [],
"source": [
- "df.loc[df.education.notna(), 'education'] = df.loc[df.education.notna(), 'education'].apply(lambda x: ast.literal_eval(x))"
+ "df['education'] = df[df.education.notna()]['education'].apply(lambda x: ast.literal_eval(x))"
]
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 95,
"metadata": {},
"outputs": [],
"source": [
- "df.loc[df.employment.notna(), 'employment'] = df.loc[df.employment.notna(), 'employment'].apply(lambda x: ast.literal_eval(x))"
+ "df['employment'] = df[df.employment.notna()]['employment'].apply(lambda x: ast.literal_eval(x))"
]
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
- "df.loc[df.external_ids.notna(), 'external_ids'] = df.loc[df.external_ids.notna(), 'external_ids'].apply(lambda x: ast.literal_eval(x))"
+ "df['external_ids'] = df[df.external_ids.notna()]['external_ids'].apply(lambda x: ast.literal_eval(x))"
]
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 97,
"metadata": {},
"outputs": [],
"source": [
- "df.loc[df.works_source.notna(), 'works_source'] = df.loc[df.works_source.notna(), 'works_source'].apply(lambda x: ast.literal_eval(x))"
+ "df['works_source'] = df[df.works_source.notna()]['works_source'].apply(lambda x: ast.literal_eval(x))"
]
},
{
@@ -9100,16 +9100,16 @@
},
{
"cell_type": "code",
- "execution_count": 52,
+ "execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
- "df.loc[df['external_ids'].notna(), 'n_ids'] = df[df['external_ids'].notna()].external_ids.str.len()"
+ "df['n_ids'] = df[df['external_ids'].notna()].external_ids.str.len()"
]
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 85,
"metadata": {},
"outputs": [
{
@@ -9126,7 +9126,7 @@
"Name: n_ids, dtype: float64"
]
},
- "execution_count": 53,
+ "execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
@@ -9489,362 +9489,6 @@
"ids[ids.provider.notna()].head()"
]
},
- {
- "cell_type": "code",
- "execution_count": 67,
- "metadata": {
- "scrolled": true
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " orcid | \n",
- " external_ids | \n",
- "
\n",
- " \n",
- " provider | \n",
- " | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Scopus Author ID | \n",
- " 1015175 | \n",
- " 1015175 | \n",
- "
\n",
- " \n",
- " ResearcherID | \n",
- " 543545 | \n",
- " 543545 | \n",
- "
\n",
- " \n",
- " Loop profile | \n",
- " 114316 | \n",
- " 114316 | \n",
- "
\n",
- " \n",
- " Ciência ID | \n",
- " 33870 | \n",
- " 33870 | \n",
- "
\n",
- " \n",
- " Researcher Name Resolver ID | \n",
- " 7810 | \n",
- " 7810 | \n",
- "
\n",
- " \n",
- " 中国科学家在线 | \n",
- " 4794 | \n",
- " 4794 | \n",
- "
\n",
- " \n",
- " ISNI | \n",
- " 3054 | \n",
- " 3054 | \n",
- "
\n",
- " \n",
- " GND | \n",
- " 2825 | \n",
- " 2825 | \n",
- "
\n",
- " \n",
- " Pitt ID | \n",
- " 2672 | \n",
- " 2672 | \n",
- "
\n",
- " \n",
- " Technical University of Denmark CWIS | \n",
- " 2486 | \n",
- " 2486 | \n",
- "
\n",
- " \n",
- " Sciprofile | \n",
- " 2441 | \n",
- " 2441 | \n",
- "
\n",
- " \n",
- " Researcher ID | \n",
- " 1417 | \n",
- " 1417 | \n",
- "
\n",
- " \n",
- " ID Dialnet | \n",
- " 1167 | \n",
- " 1167 | \n",
- "
\n",
- " \n",
- " Digital author ID | \n",
- " 1079 | \n",
- " 1079 | \n",
- "
\n",
- " \n",
- " Scopus Author ID: | \n",
- " 1076 | \n",
- " 1076 | \n",
- "
\n",
- " \n",
- " AuthenticusID | \n",
- " 847 | \n",
- " 847 | \n",
- "
\n",
- " \n",
- " HKU ResearcherPage | \n",
- " 740 | \n",
- " 740 | \n",
- "
\n",
- " \n",
- " UOW Scholars | \n",
- " 644 | \n",
- " 644 | \n",
- "
\n",
- " \n",
- " CTI Vitae | \n",
- " 581 | \n",
- " 581 | \n",
- "
\n",
- " \n",
- " Scopus Author ID: | \n",
- " 549 | \n",
- " 549 | \n",
- "
\n",
- " \n",
- " HKUST Profile | \n",
- " 521 | \n",
- " 521 | \n",
- "
\n",
- " \n",
- " Scopus author ID | \n",
- " 501 | \n",
- " 501 | \n",
- "
\n",
- " \n",
- " Chalmers ID | \n",
- " 430 | \n",
- " 430 | \n",
- "
\n",
- " \n",
- " Scopus ID | \n",
- " 232 | \n",
- " 232 | \n",
- "
\n",
- " \n",
- " iAuthor | \n",
- " 212 | \n",
- " 212 | \n",
- "
\n",
- " \n",
- " Google Scholar | \n",
- " 200 | \n",
- " 200 | \n",
- "
\n",
- " \n",
- " AuthID | \n",
- " 175 | \n",
- " 175 | \n",
- "
\n",
- " \n",
- " DAI | \n",
- " 153 | \n",
- " 153 | \n",
- "
\n",
- " \n",
- " US EPA VIVO | \n",
- " 146 | \n",
- " 146 | \n",
- "
\n",
- " \n",
- " Digital Author ID (DAI) | \n",
- " 135 | \n",
- " 135 | \n",
- "
\n",
- " \n",
- " Scopus ID | \n",
- " 127 | \n",
- " 127 | \n",
- "
\n",
- " \n",
- " Authenticus | \n",
- " 82 | \n",
- " 82 | \n",
- "
\n",
- " \n",
- " Smithsonian Profiles | \n",
- " 61 | \n",
- " 61 | \n",
- "
\n",
- " \n",
- " GitHub | \n",
- " 51 | \n",
- " 51 | \n",
- "
\n",
- " \n",
- " eScientist | \n",
- " 49 | \n",
- " 49 | \n",
- "
\n",
- " \n",
- " VIVO Cornell | \n",
- " 46 | \n",
- " 46 | \n",
- "
\n",
- " \n",
- " ResearcherID: | \n",
- " 39 | \n",
- " 39 | \n",
- "
\n",
- " \n",
- " Digital Author ID | \n",
- " 35 | \n",
- " 35 | \n",
- "
\n",
- " \n",
- " Digital author ID (DAI) | \n",
- " 34 | \n",
- " 34 | \n",
- "
\n",
- " \n",
- " ID Dialnet: | \n",
- " 7 | \n",
- " 7 | \n",
- "
\n",
- " \n",
- " Dialnet ID | \n",
- " 6 | \n",
- " 6 | \n",
- "
\n",
- " \n",
- " KAKEN | \n",
- " 5 | \n",
- " 5 | \n",
- "
\n",
- " \n",
- " UNE Researcher ID | \n",
- " 4 | \n",
- " 4 | \n",
- "
\n",
- " \n",
- " ResearcherID: | \n",
- " 3 | \n",
- " 3 | \n",
- "
\n",
- " \n",
- " ORCID | \n",
- " 2 | \n",
- " 2 | \n",
- "
\n",
- " \n",
- " Custom | \n",
- " 1 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " ORCID iD | \n",
- " 1 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " ScienceOpen | \n",
- " 1 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " ResearcherId | \n",
- " 1 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " Profile system identifier | \n",
- " 1 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " orcid external_ids\n",
- "provider \n",
- "Scopus Author ID 1015175 1015175\n",
- "ResearcherID 543545 543545\n",
- "Loop profile 114316 114316\n",
- "Ciência ID 33870 33870\n",
- "Researcher Name Resolver ID 7810 7810\n",
- "中国科学家在线 4794 4794\n",
- "ISNI 3054 3054\n",
- "GND 2825 2825\n",
- "Pitt ID 2672 2672\n",
- "Technical University of Denmark CWIS 2486 2486\n",
- "Sciprofile 2441 2441\n",
- "Researcher ID 1417 1417\n",
- "ID Dialnet 1167 1167\n",
- "Digital author ID 1079 1079\n",
- "Scopus Author ID: 1076 1076\n",
- "AuthenticusID 847 847\n",
- "HKU ResearcherPage 740 740\n",
- "UOW Scholars 644 644\n",
- "CTI Vitae 581 581\n",
- "Scopus Author ID: 549 549\n",
- "HKUST Profile 521 521\n",
- "Scopus author ID 501 501\n",
- "Chalmers ID 430 430\n",
- "Scopus ID 232 232\n",
- "iAuthor 212 212\n",
- "Google Scholar 200 200\n",
- "AuthID 175 175\n",
- "DAI 153 153\n",
- "US EPA VIVO 146 146\n",
- "Digital Author ID (DAI) 135 135\n",
- "Scopus ID 127 127\n",
- "Authenticus 82 82\n",
- "Smithsonian Profiles 61 61\n",
- "GitHub 51 51\n",
- "eScientist 49 49\n",
- "VIVO Cornell 46 46\n",
- "ResearcherID: 39 39\n",
- "Digital Author ID 35 35\n",
- "Digital author ID (DAI) 34 34\n",
- "ID Dialnet: 7 7\n",
- "Dialnet ID 6 6\n",
- "KAKEN 5 5\n",
- "UNE Researcher ID 4 4\n",
- "ResearcherID: 3 3\n",
- "ORCID 2 2\n",
- "Custom 1 1\n",
- "ORCID iD 1 1\n",
- "ScienceOpen 1 1\n",
- "ResearcherId 1 1\n",
- "Profile system identifier 1 1"
- ]
- },
- "execution_count": 67,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "ids.groupby('provider').count().sort_values('orcid', ascending=False)"
- ]
- },
{
"cell_type": "code",
"execution_count": 69,
@@ -10868,32 +10512,1225 @@
]
},
{
- "cell_type": "code",
- "execution_count": null,
+ "cell_type": "markdown",
"metadata": {},
- "outputs": [],
- "source": []
+ "source": [
+ "## Keywords"
+ ]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 75,
"metadata": {},
"outputs": [],
- "source": []
+ "source": [
+ "df['n_keywords'] = df.keywords.str.len()"
+ ]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 80,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " orcid | \n",
+ " n_keywords | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1681310 | \n",
+ " 0000-0002-0673-0341 | \n",
+ " 154.0 | \n",
+ "
\n",
+ " \n",
+ " 7717699 | \n",
+ " 0000-0002-7060-4112 | \n",
+ " 141.0 | \n",
+ "
\n",
+ " \n",
+ " 4597674 | \n",
+ " 0000-0002-6075-3501 | \n",
+ " 140.0 | \n",
+ "
\n",
+ " \n",
+ " 2066580 | \n",
+ " 0000-0002-4071-0301 | \n",
+ " 118.0 | \n",
+ "
\n",
+ " \n",
+ " 3531030 | \n",
+ " 0000-0002-9638-8091 | \n",
+ " 115.0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 10747035 | \n",
+ " 0000-0003-4998-1551 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10747036 | \n",
+ " 0000-0003-4998-4111 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10747037 | \n",
+ " 0000-0003-4998-6045 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10747038 | \n",
+ " 0000-0003-4998-8868 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10747039 | \n",
+ " 0000-0003-4999-7916 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10744621 rows × 2 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " orcid n_keywords\n",
+ "1681310 0000-0002-0673-0341 154.0\n",
+ "7717699 0000-0002-7060-4112 141.0\n",
+ "4597674 0000-0002-6075-3501 140.0\n",
+ "2066580 0000-0002-4071-0301 118.0\n",
+ "3531030 0000-0002-9638-8091 115.0\n",
+ "... ... ...\n",
+ "10747035 0000-0003-4998-1551 NaN\n",
+ "10747036 0000-0003-4998-4111 NaN\n",
+ "10747037 0000-0003-4998-6045 NaN\n",
+ "10747038 0000-0003-4998-8868 NaN\n",
+ "10747039 0000-0003-4999-7916 NaN\n",
+ "\n",
+ "[10744621 rows x 2 columns]"
+ ]
+ },
+ "execution_count": 80,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.sort_values('n_keywords', ascending=False)[['orcid', 'n_keywords']]"
+ ]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 83,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.plotly.v1+json": {
+ "config": {
+ "linkText": "Export to plot.ly",
+ "plotlyServerURL": "https://plot.ly",
+ "showLink": false
+ },
+ "data": [
+ {
+ "type": "bar",
+ "x": [
+ "0000-0002-0673-0341",
+ "0000-0002-7060-4112",
+ "0000-0002-6075-3501",
+ "0000-0002-4071-0301",
+ "0000-0002-9638-8091",
+ "0000-0002-4235-4259",
+ "0000-0001-9462-5666",
+ "0000-0003-0076-6287",
+ "0000-0002-1878-9762",
+ "0000-0001-6537-7683",
+ "0000-0001-6307-6027",
+ "0000-0003-2273-9888",
+ "0000-0003-1799-0971",
+ "0000-0001-5287-1949",
+ "0000-0002-0937-7061",
+ "0000-0001-9715-9357",
+ "0000-0001-5696-1052",
+ "0000-0003-2998-5520",
+ "0000-0001-5869-2204",
+ "0000-0002-0156-3580",
+ "0000-0002-9625-6742",
+ "0000-0002-8401-8018",
+ "0000-0001-9985-1697",
+ "0000-0003-4246-8579",
+ "0000-0002-7710-0355",
+ "0000-0002-8083-7382",
+ "0000-0001-7654-5013",
+ "0000-0001-6939-3859",
+ "0000-0002-3061-3364",
+ "0000-0003-2509-2549",
+ "0000-0002-0463-0048",
+ "0000-0001-5230-715X",
+ "0000-0001-5458-7167",
+ "0000-0001-9336-6850",
+ "0000-0003-0209-180X",
+ "0000-0002-8227-5387",
+ "0000-0002-9381-2264",
+ "0000-0003-3340-6413",
+ "0000-0003-3584-6834",
+ "0000-0002-2935-1934",
+ "0000-0002-8644-8396",
+ "0000-0002-8659-6321",
+ "0000-0002-3123-3021",
+ "0000-0001-5637-1124",
+ "0000-0001-5167-7466",
+ "0000-0002-8449-2211",
+ "0000-0003-2532-2906",
+ "0000-0002-3532-043X",
+ "0000-0002-2683-4527",
+ "0000-0003-4505-3678",
+ "0000-0002-6347-9464",
+ "0000-0003-4608-3844",
+ "0000-0003-4374-6374",
+ "0000-0003-4511-7942",
+ "0000-0002-1103-9651",
+ "0000-0003-3720-1183",
+ "0000-0001-9280-6017",
+ "0000-0003-4673-1063",
+ "0000-0001-9586-0780",
+ "0000-0002-5539-1761",
+ "0000-0003-2550-1859",
+ "0000-0002-8499-1045",
+ "0000-0003-2218-1343",
+ "0000-0002-5306-7781",
+ "0000-0003-1863-0265",
+ "0000-0002-8072-1152",
+ "0000-0003-3342-6123",
+ "0000-0002-2252-672X",
+ "0000-0002-3907-3552",
+ "0000-0001-6861-9561",
+ "0000-0002-3597-3350",
+ "0000-0002-5274-7742",
+ "0000-0002-3186-8860",
+ "0000-0001-8689-185X",
+ "0000-0002-6282-0640",
+ "0000-0003-3343-5660",
+ "0000-0001-7133-7848",
+ "0000-0003-4486-2684",
+ "0000-0002-9014-2090",
+ "0000-0001-6843-9325",
+ "0000-0003-0097-4182",
+ "0000-0003-1245-7705",
+ "0000-0002-4432-3448",
+ "0000-0001-7857-4133",
+ "0000-0002-1294-2156",
+ "0000-0001-8445-412X",
+ "0000-0002-1411-3028",
+ "0000-0003-3387-3193",
+ "0000-0002-1545-7818",
+ "0000-0002-9125-6022",
+ "0000-0002-3898-9542",
+ "0000-0002-3866-6460",
+ "0000-0003-4283-2895",
+ "0000-0003-4153-6779",
+ "0000-0002-4598-2891",
+ "0000-0002-0211-7195",
+ "0000-0002-1770-9660",
+ "0000-0002-1960-5857",
+ "0000-0003-2640-6757",
+ "0000-0002-5432-9595"
+ ],
+ "y": [
+ 154,
+ 141,
+ 140,
+ 118,
+ 115,
+ 104,
+ 98,
+ 94,
+ 92,
+ 91,
+ 88,
+ 86,
+ 84,
+ 82,
+ 78,
+ 77,
+ 76,
+ 75,
+ 74,
+ 73,
+ 71,
+ 70,
+ 69,
+ 66,
+ 64,
+ 62,
+ 61,
+ 60,
+ 58,
+ 57,
+ 56,
+ 54,
+ 53,
+ 53,
+ 52,
+ 51,
+ 51,
+ 51,
+ 51,
+ 50,
+ 50,
+ 50,
+ 50,
+ 49,
+ 49,
+ 49,
+ 48,
+ 48,
+ 48,
+ 48,
+ 48,
+ 48,
+ 47,
+ 47,
+ 46,
+ 46,
+ 46,
+ 45,
+ 45,
+ 44,
+ 44,
+ 44,
+ 44,
+ 44,
+ 44,
+ 43,
+ 43,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 42,
+ 41,
+ 41,
+ 41,
+ 41,
+ 41,
+ 41,
+ 41,
+ 40,
+ 40,
+ 40,
+ 40,
+ 40,
+ 40,
+ 39,
+ 39,
+ 39,
+ 39,
+ 39,
+ 39,
+ 39,
+ 39,
+ 39,
+ 39,
+ 38,
+ 38,
+ 38,
+ 38
+ ]
+ }
+ ],
+ "layout": {
+ "template": {
+ "data": {
+ "bar": [
+ {
+ "error_x": {
+ "color": "#2a3f5f"
+ },
+ "error_y": {
+ "color": "#2a3f5f"
+ },
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ }
+ },
+ "type": "bar"
+ }
+ ],
+ "barpolar": [
+ {
+ "marker": {
+ "line": {
+ "color": "#E5ECF6",
+ "width": 0.5
+ }
+ },
+ "type": "barpolar"
+ }
+ ],
+ "carpet": [
+ {
+ "aaxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "baxis": {
+ "endlinecolor": "#2a3f5f",
+ "gridcolor": "white",
+ "linecolor": "white",
+ "minorgridcolor": "white",
+ "startlinecolor": "#2a3f5f"
+ },
+ "type": "carpet"
+ }
+ ],
+ "choropleth": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "choropleth"
+ }
+ ],
+ "contour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "contour"
+ }
+ ],
+ "contourcarpet": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "contourcarpet"
+ }
+ ],
+ "heatmap": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmap"
+ }
+ ],
+ "heatmapgl": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "heatmapgl"
+ }
+ ],
+ "histogram": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "histogram"
+ }
+ ],
+ "histogram2d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2d"
+ }
+ ],
+ "histogram2dcontour": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "histogram2dcontour"
+ }
+ ],
+ "mesh3d": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "type": "mesh3d"
+ }
+ ],
+ "parcoords": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "parcoords"
+ }
+ ],
+ "pie": [
+ {
+ "automargin": true,
+ "type": "pie"
+ }
+ ],
+ "scatter": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter"
+ }
+ ],
+ "scatter3d": [
+ {
+ "line": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatter3d"
+ }
+ ],
+ "scattercarpet": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattercarpet"
+ }
+ ],
+ "scattergeo": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergeo"
+ }
+ ],
+ "scattergl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattergl"
+ }
+ ],
+ "scattermapbox": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scattermapbox"
+ }
+ ],
+ "scatterpolar": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolar"
+ }
+ ],
+ "scatterpolargl": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterpolargl"
+ }
+ ],
+ "scatterternary": [
+ {
+ "marker": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "type": "scatterternary"
+ }
+ ],
+ "surface": [
+ {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ },
+ "colorscale": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "type": "surface"
+ }
+ ],
+ "table": [
+ {
+ "cells": {
+ "fill": {
+ "color": "#EBF0F8"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "header": {
+ "fill": {
+ "color": "#C8D4E3"
+ },
+ "line": {
+ "color": "white"
+ }
+ },
+ "type": "table"
+ }
+ ]
+ },
+ "layout": {
+ "annotationdefaults": {
+ "arrowcolor": "#2a3f5f",
+ "arrowhead": 0,
+ "arrowwidth": 1
+ },
+ "autotypenumbers": "strict",
+ "coloraxis": {
+ "colorbar": {
+ "outlinewidth": 0,
+ "ticks": ""
+ }
+ },
+ "colorscale": {
+ "diverging": [
+ [
+ 0,
+ "#8e0152"
+ ],
+ [
+ 0.1,
+ "#c51b7d"
+ ],
+ [
+ 0.2,
+ "#de77ae"
+ ],
+ [
+ 0.3,
+ "#f1b6da"
+ ],
+ [
+ 0.4,
+ "#fde0ef"
+ ],
+ [
+ 0.5,
+ "#f7f7f7"
+ ],
+ [
+ 0.6,
+ "#e6f5d0"
+ ],
+ [
+ 0.7,
+ "#b8e186"
+ ],
+ [
+ 0.8,
+ "#7fbc41"
+ ],
+ [
+ 0.9,
+ "#4d9221"
+ ],
+ [
+ 1,
+ "#276419"
+ ]
+ ],
+ "sequential": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ],
+ "sequentialminus": [
+ [
+ 0,
+ "#0d0887"
+ ],
+ [
+ 0.1111111111111111,
+ "#46039f"
+ ],
+ [
+ 0.2222222222222222,
+ "#7201a8"
+ ],
+ [
+ 0.3333333333333333,
+ "#9c179e"
+ ],
+ [
+ 0.4444444444444444,
+ "#bd3786"
+ ],
+ [
+ 0.5555555555555556,
+ "#d8576b"
+ ],
+ [
+ 0.6666666666666666,
+ "#ed7953"
+ ],
+ [
+ 0.7777777777777778,
+ "#fb9f3a"
+ ],
+ [
+ 0.8888888888888888,
+ "#fdca26"
+ ],
+ [
+ 1,
+ "#f0f921"
+ ]
+ ]
+ },
+ "colorway": [
+ "#636efa",
+ "#EF553B",
+ "#00cc96",
+ "#ab63fa",
+ "#FFA15A",
+ "#19d3f3",
+ "#FF6692",
+ "#B6E880",
+ "#FF97FF",
+ "#FECB52"
+ ],
+ "font": {
+ "color": "#2a3f5f"
+ },
+ "geo": {
+ "bgcolor": "white",
+ "lakecolor": "white",
+ "landcolor": "#E5ECF6",
+ "showlakes": true,
+ "showland": true,
+ "subunitcolor": "white"
+ },
+ "hoverlabel": {
+ "align": "left"
+ },
+ "hovermode": "closest",
+ "mapbox": {
+ "style": "light"
+ },
+ "paper_bgcolor": "white",
+ "plot_bgcolor": "#E5ECF6",
+ "polar": {
+ "angularaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "radialaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "scene": {
+ "xaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "yaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ },
+ "zaxis": {
+ "backgroundcolor": "#E5ECF6",
+ "gridcolor": "white",
+ "gridwidth": 2,
+ "linecolor": "white",
+ "showbackground": true,
+ "ticks": "",
+ "zerolinecolor": "white"
+ }
+ },
+ "shapedefaults": {
+ "line": {
+ "color": "#2a3f5f"
+ }
+ },
+ "ternary": {
+ "aaxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "baxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ },
+ "bgcolor": "#E5ECF6",
+ "caxis": {
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": ""
+ }
+ },
+ "title": {
+ "x": 0.05
+ },
+ "xaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ },
+ "yaxis": {
+ "automargin": true,
+ "gridcolor": "white",
+ "linecolor": "white",
+ "ticks": "",
+ "title": {
+ "standoff": 15
+ },
+ "zerolinecolor": "white",
+ "zerolinewidth": 2
+ }
+ }
+ },
+ "title": {
+ "text": "Keywords provided"
+ },
+ "xaxis": {
+ "tickangle": 45,
+ "tickfont": {
+ "size": 12
+ }
+ }
+ }
+ },
+ "text/html": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "data = [\n",
+ " go.Bar(\n",
+ " x=df.sort_values('n_keywords', ascending=False)['orcid'][:100],\n",
+ " y=df.sort_values('n_keywords', ascending=False)['n_keywords'][:100]\n",
+ " )\n",
+ "]\n",
+ "\n",
+ "layout = go.Layout(\n",
+ " title='Keywords provided',\n",
+ " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
+ ")\n",
+ "fig = go.Figure(data=data, layout=layout)\n",
+ "plotly.offline.iplot(fig)"
+ ]
},
{
"cell_type": "markdown",
@@ -10904,7 +11741,7 @@
},
{
"cell_type": "code",
- "execution_count": 62,
+ "execution_count": 81,
"metadata": {},
"outputs": [
{
@@ -10926,7 +11763,8 @@
"n_works",
"n_emails",
"n_urls",
- "n_ids"
+ "n_ids",
+ "n_keywords"
],
"xaxis": "x",
"y": [
@@ -10936,11 +11774,13 @@
"n_works",
"n_emails",
"n_urls",
- "n_ids"
+ "n_ids",
+ "n_keywords"
],
"yaxis": "y",
"z": [
[
+ null,
null,
null,
null,
@@ -10956,7 +11796,8 @@
0.06481728326324665,
0.011268135706995959,
0.016235518285109687,
- 0.08833871138587861
+ 0.08833871138587861,
+ 0.017045184883982135
],
[
null,
@@ -10965,7 +11806,8 @@
0.0659649633755603,
0.009337798958686118,
0.0168692743777146,
- 0.08953190210488794
+ 0.08953190210488794,
+ 0.017626925761491493
],
[
null,
@@ -10974,7 +11816,8 @@
1,
0.05241643710974057,
0.05179828612278866,
- 0.23927720131732308
+ 0.23927720131732308,
+ 0.030960689715636
],
[
null,
@@ -10983,7 +11826,8 @@
0.05241643710974057,
1,
0.11119920854300894,
- 0.04817263453943147
+ 0.04817263453943147,
+ 0.04157758861961359
],
[
null,
@@ -10992,7 +11836,8 @@
0.05179828612278866,
0.11119920854300894,
1,
- 0.06925708918455128
+ 0.06925708918455128,
+ 0.15926017909633472
],
[
null,
@@ -11001,6 +11846,17 @@
0.23927720131732308,
0.04817263453943147,
0.06925708918455128,
+ 1,
+ 0.06320236481237387
+ ],
+ [
+ null,
+ 0.017045184883982135,
+ 0.017626925761491493,
+ 0.030960689715636,
+ 0.04157758861961359,
+ 0.15926017909633472,
+ 0.06320236481237387,
1
]
]
@@ -11881,9 +12737,9 @@
}
},
"text/html": [
- "