|
|
|
@ -51,7 +51,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 20,
|
|
|
|
|
"execution_count": 2,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -452,7 +452,7 @@
|
|
|
|
|
"4 2021-06-11 "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 20,
|
|
|
|
|
"execution_count": 2,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
@ -473,7 +473,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 14,
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -492,7 +492,7 @@
|
|
|
|
|
" dtype='object')"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 14,
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
@ -503,7 +503,22 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
"execution_count": 4,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": [
|
|
|
|
|
"def empty_list_is_nan(cell):\n",
|
|
|
|
|
" if isinstance(cell, list):\n",
|
|
|
|
|
" return np.nan if len(cell) == 0 else cell\n",
|
|
|
|
|
" else:\n",
|
|
|
|
|
" return cell\n",
|
|
|
|
|
" \n",
|
|
|
|
|
"re3data_df = re3data_df.applymap(empty_list_is_nan)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 5,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -574,21 +589,21 @@
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2137</td>\n",
|
|
|
|
|
" <td>2686</td>\n",
|
|
|
|
|
" <td>829</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2677</td>\n",
|
|
|
|
|
" <td>1260</td>\n",
|
|
|
|
|
" <td>1248</td>\n",
|
|
|
|
|
" <td>1762</td>\n",
|
|
|
|
|
" <td>146</td>\n",
|
|
|
|
|
" <td>2685</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2700</td>\n",
|
|
|
|
|
" <td>2699</td>\n",
|
|
|
|
|
" <td>2699</td>\n",
|
|
|
|
|
" <td>2706</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
@ -616,21 +631,21 @@
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
|
" <td>2704</td>\n",
|
|
|
|
|
" <td>2129</td>\n",
|
|
|
|
|
" <td>2128</td>\n",
|
|
|
|
|
" <td>2683</td>\n",
|
|
|
|
|
" <td>829</td>\n",
|
|
|
|
|
" <td>828</td>\n",
|
|
|
|
|
" <td>2705</td>\n",
|
|
|
|
|
" <td>9</td>\n",
|
|
|
|
|
" <td>8</td>\n",
|
|
|
|
|
" <td>1233</td>\n",
|
|
|
|
|
" <td>687</td>\n",
|
|
|
|
|
" <td>351</td>\n",
|
|
|
|
|
" <td>79</td>\n",
|
|
|
|
|
" <td>1368</td>\n",
|
|
|
|
|
" <td>1367</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>1324</td>\n",
|
|
|
|
|
" <td>5</td>\n",
|
|
|
|
|
" <td>2475</td>\n",
|
|
|
|
|
" <td>2686</td>\n",
|
|
|
|
|
" <td>1323</td>\n",
|
|
|
|
|
" <td>4</td>\n",
|
|
|
|
|
" <td>2474</td>\n",
|
|
|
|
|
" <td>2685</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
@ -655,12 +670,12 @@
|
|
|
|
|
" </tr>\n",
|
|
|
|
|
" <tr>\n",
|
|
|
|
|
" <th>top</th>\n",
|
|
|
|
|
" <td>re3data_____::d8e2164dd005d3961c23e0762453cfb1</td>\n",
|
|
|
|
|
" <td>r3d100010836</td>\n",
|
|
|
|
|
" <td>UCLA Social Science Data Archive Dataverse</td>\n",
|
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
|
" <td>re3data_____::4cea5a5ea78542232a51190879756661</td>\n",
|
|
|
|
|
" <td>r3d100011254</td>\n",
|
|
|
|
|
" <td>EarthChem Library</td>\n",
|
|
|
|
|
" <td>[IRIS]</td>\n",
|
|
|
|
|
" <td>http://www.jcvi.org/cms/home/</td>\n",
|
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
|
" <td>[doi:10.17171/1-6]</td>\n",
|
|
|
|
|
" <td>The repository is no longer available. >>>!!!<...</td>\n",
|
|
|
|
|
" <td>[disciplinary]</td>\n",
|
|
|
|
|
" <td>2 datasets</td>\n",
|
|
|
|
@ -700,9 +715,9 @@
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>1</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>570</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>1878</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>2</td>\n",
|
|
|
|
|
" <td>1713</td>\n",
|
|
|
|
|
" <td>6</td>\n",
|
|
|
|
@ -745,48 +760,42 @@
|
|
|
|
|
" openaire_id re3data_id \\\n",
|
|
|
|
|
"count 2707 2707 \n",
|
|
|
|
|
"unique 2707 2707 \n",
|
|
|
|
|
"top re3data_____::d8e2164dd005d3961c23e0762453cfb1 r3d100010836 \n",
|
|
|
|
|
"top re3data_____::4cea5a5ea78542232a51190879756661 r3d100011254 \n",
|
|
|
|
|
"freq 1 1 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" repository_name additional_name \\\n",
|
|
|
|
|
"count 2707 2707 \n",
|
|
|
|
|
"unique 2704 2129 \n",
|
|
|
|
|
"top UCLA Social Science Data Archive Dataverse [] \n",
|
|
|
|
|
"freq 2 570 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" repository_url repository_id \\\n",
|
|
|
|
|
"count 2686 2707 \n",
|
|
|
|
|
"unique 2683 829 \n",
|
|
|
|
|
"top http://www.jcvi.org/cms/home/ [] \n",
|
|
|
|
|
"freq 2 1878 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" description type \\\n",
|
|
|
|
|
"count 2707 2707 \n",
|
|
|
|
|
"unique 2705 9 \n",
|
|
|
|
|
"top The repository is no longer available. >>>!!!<... [disciplinary] \n",
|
|
|
|
|
"freq 2 1713 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" size update_date start_date end_date \\\n",
|
|
|
|
|
"count 1260 1248 1762 146 \n",
|
|
|
|
|
"unique 1233 687 351 79 \n",
|
|
|
|
|
"top 2 datasets 2019-05-15 2008 2015 \n",
|
|
|
|
|
"freq 6 15 92 11 \n",
|
|
|
|
|
" repository_name additional_name repository_url \\\n",
|
|
|
|
|
"count 2707 2137 2686 \n",
|
|
|
|
|
"unique 2704 2128 2683 \n",
|
|
|
|
|
"top EarthChem Library [IRIS] http://www.jcvi.org/cms/home/ \n",
|
|
|
|
|
"freq 2 2 2 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" repository_id description \\\n",
|
|
|
|
|
"count 829 2707 \n",
|
|
|
|
|
"unique 828 2705 \n",
|
|
|
|
|
"top [doi:10.17171/1-6] The repository is no longer available. >>>!!!<... \n",
|
|
|
|
|
"freq 2 2 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" type size update_date start_date end_date \\\n",
|
|
|
|
|
"count 2677 1260 1248 1762 146 \n",
|
|
|
|
|
"unique 8 1233 687 351 79 \n",
|
|
|
|
|
"top [disciplinary] 2 datasets 2019-05-15 2008 2015 \n",
|
|
|
|
|
"freq 1713 6 15 92 11 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" subject mission_statement \\\n",
|
|
|
|
|
"count 2707 2707 \n",
|
|
|
|
|
"unique 1368 2 \n",
|
|
|
|
|
"count 2685 2707 \n",
|
|
|
|
|
"unique 1367 2 \n",
|
|
|
|
|
"top [1 Humanities and Social Sciences, 2 Life Scie... true \n",
|
|
|
|
|
"freq 222 2286 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" content_type provider_type keyword \\\n",
|
|
|
|
|
"count 2707 2707 2707 \n",
|
|
|
|
|
"unique 1324 5 2475 \n",
|
|
|
|
|
"count 2700 2699 2699 \n",
|
|
|
|
|
"unique 1323 4 2474 \n",
|
|
|
|
|
"top [Standard office documents] [dataProvider] [multidisciplinary] \n",
|
|
|
|
|
"freq 30 1748 190 \n",
|
|
|
|
|
"\n",
|
|
|
|
|
" institution policy \\\n",
|
|
|
|
|
"count 2707 2707 \n",
|
|
|
|
|
"unique 2686 2 \n",
|
|
|
|
|
"count 2706 2707 \n",
|
|
|
|
|
"unique 2685 2 \n",
|
|
|
|
|
"top [[National Center for Biotechnology Informatio... true \n",
|
|
|
|
|
"freq 6 2394 \n",
|
|
|
|
|
"\n",
|
|
|
|
@ -827,7 +836,7 @@
|
|
|
|
|
"freq 47 "
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 3,
|
|
|
|
|
"execution_count": 5,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
@ -838,7 +847,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 10,
|
|
|
|
|
"execution_count": 6,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -847,21 +856,21 @@
|
|
|
|
|
"openaire_id 0\n",
|
|
|
|
|
"re3data_id 0\n",
|
|
|
|
|
"repository_name 0\n",
|
|
|
|
|
"additional_name 0\n",
|
|
|
|
|
"additional_name 570\n",
|
|
|
|
|
"repository_url 21\n",
|
|
|
|
|
"repository_id 0\n",
|
|
|
|
|
"repository_id 1878\n",
|
|
|
|
|
"description 0\n",
|
|
|
|
|
"type 0\n",
|
|
|
|
|
"type 30\n",
|
|
|
|
|
"size 1447\n",
|
|
|
|
|
"update_date 1459\n",
|
|
|
|
|
"start_date 945\n",
|
|
|
|
|
"end_date 2561\n",
|
|
|
|
|
"subject 0\n",
|
|
|
|
|
"subject 22\n",
|
|
|
|
|
"mission_statement 0\n",
|
|
|
|
|
"content_type 0\n",
|
|
|
|
|
"provider_type 0\n",
|
|
|
|
|
"keyword 0\n",
|
|
|
|
|
"institution 0\n",
|
|
|
|
|
"content_type 7\n",
|
|
|
|
|
"provider_type 8\n",
|
|
|
|
|
"keyword 8\n",
|
|
|
|
|
"institution 1\n",
|
|
|
|
|
"policy 0\n",
|
|
|
|
|
"database_access 0\n",
|
|
|
|
|
"database_license 0\n",
|
|
|
|
@ -886,7 +895,7 @@
|
|
|
|
|
"dtype: int64"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 10,
|
|
|
|
|
"execution_count": 6,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
@ -897,7 +906,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 18,
|
|
|
|
|
"execution_count": 7,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -911,7 +920,7 @@
|
|
|
|
|
" 'Configuration data', 'Networkbased data', nan], dtype=object)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 18,
|
|
|
|
|
"execution_count": 7,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
@ -922,7 +931,7 @@
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": 19,
|
|
|
|
|
"execution_count": 8,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [
|
|
|
|
|
{
|
|
|
|
@ -931,7 +940,7 @@
|
|
|
|
|
"array(['dataProvider', 'serviceProvider', nan], dtype=object)"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
"execution_count": 19,
|
|
|
|
|
"execution_count": 8,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"output_type": "execute_result"
|
|
|
|
|
}
|
|
|
|
@ -939,13 +948,6 @@
|
|
|
|
|
"source": [
|
|
|
|
|
"re3data_df.provider_type.explode().unique()"
|
|
|
|
|
]
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
"cell_type": "code",
|
|
|
|
|
"execution_count": null,
|
|
|
|
|
"metadata": {},
|
|
|
|
|
"outputs": [],
|
|
|
|
|
"source": []
|
|
|
|
|
}
|
|
|
|
|
],
|
|
|
|
|
"metadata": {
|
|
|
|
|