rerunning notebooks

This commit is contained in:
Andrea Mannocci 2022-02-14 13:34:42 +01:00
parent 6c71bde5f8
commit e537f30a32
6 changed files with 2488 additions and 2428 deletions

View File

@ -237,7 +237,7 @@
" <td>eng</td>\n", " <td>eng</td>\n",
" <td>[{'additionalName': 'University of North Carol...</td>\n", " <td>[{'additionalName': 'University of North Carol...</td>\n",
" <td>https://dataverse.unc.edu/</td>\n", " <td>https://dataverse.unc.edu/</td>\n",
" <td>[]</td>\n", " <td>[FAIRsharing_doi:10.25504/FAIRsharing.pS2p8c]</td>\n",
" <td>[\"https://dataverse.unc.edu/\", \"odumarchive@un...</td>\n", " <td>[\"https://dataverse.unc.edu/\", \"odumarchive@un...</td>\n",
" <td>UNC Dataverse is an open-source repository sof...</td>\n", " <td>UNC Dataverse is an open-source repository sof...</td>\n",
" <td>eng</td>\n", " <td>eng</td>\n",
@ -272,7 +272,7 @@
" <td>{}</td>\n", " <td>{}</td>\n",
" <td>UNC Dataverse is covered by Clarivate Data Cit...</td>\n", " <td>UNC Dataverse is covered by Clarivate Data Cit...</td>\n",
" <td>2012-07-23</td>\n", " <td>2012-07-23</td>\n",
" <td>2021-08-11</td>\n", " <td>2021-10-25</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>4</th>\n", " <th>4</th>\n",
@ -348,7 +348,7 @@
"0 [] \n", "0 [] \n",
"1 [RRID:SCR_010479, RRID:nlx_157752] \n", "1 [RRID:SCR_010479, RRID:nlx_157752] \n",
"2 [] \n", "2 [] \n",
"3 [] \n", "3 [FAIRsharing_doi:10.25504/FAIRsharing.pS2p8c] \n",
"4 [FAIRsharing_doi:10.25504/FAIRsharing.hm1mfg] \n", "4 [FAIRsharing_doi:10.25504/FAIRsharing.hm1mfg] \n",
"\n", "\n",
" repositoryContact \\\n", " repositoryContact \\\n",
@ -509,7 +509,7 @@
"0 Odum Dataverse is covered by Thomson Reuters D... 2013-06-10 2021-07-06 \n", "0 Odum Dataverse is covered by Thomson Reuters D... 2013-06-10 2021-07-06 \n",
"1 NaN 2012-07-04 2021-05-25 \n", "1 NaN 2012-07-04 2021-05-25 \n",
"2 NaN 2012-07-20 2020-08-27 \n", "2 NaN 2012-07-20 2020-08-27 \n",
"3 UNC Dataverse is covered by Clarivate Data Cit... 2012-07-23 2021-08-11 \n", "3 UNC Dataverse is covered by Clarivate Data Cit... 2012-07-23 2021-10-25 \n",
"4 ADS is covered by Clarivate Data Citation Inde... 2012-07-23 2021-09-02 " "4 ADS is covered by Clarivate Data Citation Inde... 2012-07-23 2021-09-02 "
] ]
}, },
@ -651,100 +651,100 @@
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>count</th>\n", " <th>count</th>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2170</td>\n", " <td>2206</td>\n",
" <td>2716</td>\n", " <td>2769</td>\n",
" <td>863</td>\n", " <td>1024</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2710</td>\n", " <td>2777</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>1776</td>\n", " <td>1800</td>\n",
" <td>157</td>\n", " <td>172</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2720</td>\n", " <td>2789</td>\n",
" <td>2318</td>\n", " <td>2373</td>\n",
" <td>2732</td>\n", " <td>2787</td>\n",
" <td>2735</td>\n", " <td>2788</td>\n",
" <td>2732</td>\n", " <td>2785</td>\n",
" <td>2738</td>\n", " <td>2792</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2711</td>\n", " <td>2778</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>1316</td>\n", " <td>1339</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>1512</td>\n", " <td>1532</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2737</td>\n", " <td>2793</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>1674</td>\n", " <td>1694</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>unique</th>\n", " <th>unique</th>\n",
" <td>2739</td>\n", " <td>2793</td>\n",
" <td>2736</td>\n", " <td>2791</td>\n",
" <td>19</td>\n", " <td>19</td>\n",
" <td>2161</td>\n", " <td>2196</td>\n",
" <td>2713</td>\n", " <td>2766</td>\n",
" <td>863</td>\n", " <td>1023</td>\n",
" <td>2459</td>\n", " <td>2532</td>\n",
" <td>2737</td>\n", " <td>2792</td>\n",
" <td>6</td>\n", " <td>6</td>\n",
" <td>8</td>\n", " <td>8</td>\n",
" <td>1289</td>\n", " <td>1321</td>\n",
" <td>352</td>\n", " <td>362</td>\n",
" <td>80</td>\n", " <td>86</td>\n",
" <td>107</td>\n", " <td>110</td>\n",
" <td>1388</td>\n", " <td>1417</td>\n",
" <td>2249</td>\n", " <td>2304</td>\n",
" <td>1337</td>\n", " <td>1350</td>\n",
" <td>4</td>\n", " <td>5</td>\n",
" <td>2503</td>\n", " <td>2543</td>\n",
" <td>2719</td>\n", " <td>2772</td>\n",
" <td>2319</td>\n", " <td>2366</td>\n",
" <td>12</td>\n", " <td>12</td>\n",
" <td>375</td>\n", " <td>377</td>\n",
" <td>145</td>\n", " <td>146</td>\n",
" <td>2263</td>\n", " <td>2294</td>\n",
" <td>3</td>\n", " <td>3</td>\n",
" <td>681</td>\n", " <td>695</td>\n",
" <td>23</td>\n", " <td>23</td>\n",
" <td>2</td>\n", " <td>2</td>\n",
" <td>1146</td>\n", " <td>1170</td>\n",
" <td>29</td>\n", " <td>29</td>\n",
" <td>1321</td>\n", " <td>1337</td>\n",
" <td>12</td>\n", " <td>13</td>\n",
" <td>3</td>\n", " <td>3</td>\n",
" <td>3</td>\n", " <td>3</td>\n",
" <td>14</td>\n", " <td>16</td>\n",
" <td>172</td>\n", " <td>175</td>\n",
" <td>563</td>\n", " <td>544</td>\n",
" <td>1656</td>\n", " <td>1673</td>\n",
" <td>1275</td>\n", " <td>1316</td>\n",
" <td>740</td>\n", " <td>722</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>top</th>\n", " <th>top</th>\n",
" <td>r3d100000001</td>\n", " <td>r3d100000001</td>\n",
" <td>Språkbanken</td>\n", " <td>EarthChem Library</td>\n",
" <td>eng</td>\n", " <td>eng</td>\n",
" <td>[{'additionalName': 'MPC', 'additionalNameLang...</td>\n", " <td>[{'additionalName': 'FRED', 'additionalNameLan...</td>\n",
" <td>http://icgem.gfz-potsdam.de/home</td>\n", " <td>http://icgem.gfz-potsdam.de/home</td>\n",
" <td>[RRID:SCR_010479, RRID:nlx_157752]</td>\n", " <td>[biodbcore-001574]</td>\n",
" <td>[]</td>\n", " <td>[]</td>\n",
" <td>The National Archives and Records Administrati...</td>\n", " <td>The National Archives and Records Administrati...</td>\n",
" <td>eng</td>\n", " <td>eng</td>\n",
@ -778,165 +778,165 @@
" <td>[]</td>\n", " <td>[]</td>\n",
" <td>{}</td>\n", " <td>{}</td>\n",
" <td>is covered by Elsevier.</td>\n", " <td>is covered by Elsevier.</td>\n",
" <td>2016-05-10</td>\n", " <td>2018-08-10</td>\n",
" <td>2021-09-03</td>\n", " <td>2021-09-03</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>freq</th>\n", " <th>freq</th>\n",
" <td>1</td>\n", " <td>1</td>\n",
" <td>2</td>\n", " <td>2</td>\n",
" <td>2554</td>\n", " <td>2596</td>\n",
" <td>2</td>\n", " <td>2</td>\n",
" <td>2</td>\n", " <td>2</td>\n",
" <td>1</td>\n",
" <td>202</td>\n",
" <td>2</td>\n", " <td>2</td>\n",
" <td>2723</td>\n", " <td>170</td>\n",
" <td>1733</td>\n", " <td>2</td>\n",
" <td>1450</td>\n", " <td>2776</td>\n",
" <td>92</td>\n", " <td>1768</td>\n",
" <td>11</td>\n", " <td>1472</td>\n",
" <td>2063</td>\n", " <td>93</td>\n",
" <td>226</td>\n", " <td>12</td>\n",
" <td>14</td>\n", " <td>2088</td>\n",
" <td>30</td>\n", " <td>240</td>\n",
" <td>1771</td>\n",
" <td>193</td>\n",
" <td>6</td>\n",
" <td>312</td>\n",
" <td>2571</td>\n",
" <td>2159</td>\n",
" <td>1269</td>\n",
" <td>64</td>\n",
" <td>1793</td>\n",
" <td>2013</td>\n",
" <td>1226</td>\n",
" <td>1108</td>\n",
" <td>1498</td>\n",
" <td>1361</td>\n",
" <td>72</td>\n",
" <td>2155</td>\n",
" <td>1608</td>\n",
" <td>1515</td>\n",
" <td>2509</td>\n",
" <td>1669</td>\n",
" <td>2162</td>\n",
" <td>14</td>\n", " <td>14</td>\n",
" <td>29</td>\n",
" <td>1806</td>\n",
" <td>205</td>\n",
" <td>7</td>\n",
" <td>319</td>\n",
" <td>2624</td>\n",
" <td>2201</td>\n",
" <td>1292</td>\n",
" <td>71</td>\n",
" <td>1851</td>\n",
" <td>2054</td>\n",
" <td>1216</td>\n",
" <td>1131</td>\n",
" <td>1526</td>\n",
" <td>1359</td>\n",
" <td>76</td>\n",
" <td>2199</td>\n",
" <td>1643</td>\n",
" <td>1569</td>\n",
" <td>2557</td>\n",
" <td>1693</td>\n",
" <td>2235</td>\n",
" <td>17</td>\n",
" <td>20</td>\n", " <td>20</td>\n",
" <td>137</td>\n", " <td>104</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" orgIdentifier repositoryName repositoryName.language \\\n", " orgIdentifier repositoryName repositoryName.language \\\n",
"count 2739 2739 2739 \n", "count 2793 2793 2793 \n",
"unique 2739 2736 19 \n", "unique 2793 2791 19 \n",
"top r3d100000001 Språkbanken eng \n", "top r3d100000001 EarthChem Library eng \n",
"freq 1 2 2554 \n", "freq 1 2 2596 \n",
"\n", "\n",
" additionalName \\\n", " additionalName \\\n",
"count 2170 \n", "count 2206 \n",
"unique 2161 \n", "unique 2196 \n",
"top [{'additionalName': 'MPC', 'additionalNameLang... \n", "top [{'additionalName': 'FRED', 'additionalNameLan... \n",
"freq 2 \n", "freq 2 \n",
"\n", "\n",
" repositoryURL repositoryIdentifier \\\n", " repositoryURL repositoryIdentifier \\\n",
"count 2716 863 \n", "count 2769 1024 \n",
"unique 2713 863 \n", "unique 2766 1023 \n",
"top http://icgem.gfz-potsdam.de/home [RRID:SCR_010479, RRID:nlx_157752] \n", "top http://icgem.gfz-potsdam.de/home [biodbcore-001574] \n",
"freq 2 1 \n", "freq 2 2 \n",
"\n", "\n",
" repositoryContact description \\\n", " repositoryContact description \\\n",
"count 2739 2739 \n", "count 2793 2793 \n",
"unique 2459 2737 \n", "unique 2532 2792 \n",
"top [] The National Archives and Records Administrati... \n", "top [] The National Archives and Records Administrati... \n",
"freq 202 2 \n", "freq 170 2 \n",
"\n", "\n",
" description.language type size \\\n", " description.language type size \\\n",
"count 2739 2710 2739 \n", "count 2793 2777 2793 \n",
"unique 6 8 1289 \n", "unique 6 8 1321 \n",
"top eng [disciplinary] {\"size\": \"\", \"updatedp\": \"\"} \n", "top eng [disciplinary] {\"size\": \"\", \"updatedp\": \"\"} \n",
"freq 2723 1733 1450 \n", "freq 2776 1768 1472 \n",
"\n", "\n",
" startDate endDate repositoryLanguage \\\n", " startDate endDate repositoryLanguage \\\n",
"count 1776 157 2739 \n", "count 1800 172 2793 \n",
"unique 352 80 107 \n", "unique 362 86 110 \n",
"top 2008 2015 [\"eng\"] \n", "top 2008 2015 [\"eng\"] \n",
"freq 92 11 2063 \n", "freq 93 12 2088 \n",
"\n", "\n",
" subject \\\n", " subject \\\n",
"count 2720 \n", "count 2789 \n",
"unique 1388 \n", "unique 1417 \n",
"top [{'name': '1 Humanities and Social Sciences', ... \n", "top [{'name': '1 Humanities and Social Sciences', ... \n",
"freq 226 \n", "freq 240 \n",
"\n", "\n",
" missionStatementURL \\\n", " missionStatementURL \\\n",
"count 2318 \n", "count 2373 \n",
"unique 2249 \n", "unique 2304 \n",
"top https://learn.scholarsportal.info/all-guides/d... \n", "top https://learn.scholarsportal.info/all-guides/d... \n",
"freq 14 \n", "freq 14 \n",
"\n", "\n",
" contentType providerType \\\n", " contentType providerType \\\n",
"count 2732 2735 \n", "count 2787 2788 \n",
"unique 1337 4 \n", "unique 1350 5 \n",
"top [{'name': 'Standard office documents', 'scheme... [dataProvider] \n", "top [{'name': 'Standard office documents', 'scheme... [dataProvider] \n",
"freq 30 1771 \n", "freq 29 1806 \n",
"\n", "\n",
" keyword \\\n", " keyword \\\n",
"count 2732 \n", "count 2785 \n",
"unique 2503 \n", "unique 2543 \n",
"top [multidisciplinary] \n", "top [multidisciplinary] \n",
"freq 193 \n", "freq 205 \n",
"\n", "\n",
" institution policy \\\n", " institution policy \\\n",
"count 2738 2739 \n", "count 2792 2793 \n",
"unique 2719 2319 \n", "unique 2772 2366 \n",
"top [{'institutionName': 'National Center for Biot... [][] \n", "top [{'institutionName': 'National Center for Biot... [][] \n",
"freq 6 312 \n", "freq 7 319 \n",
"\n", "\n",
" databaseAccess databaseLicense \\\n", " databaseAccess databaseLicense \\\n",
"count 2739 2739 \n", "count 2793 2793 \n",
"unique 12 375 \n", "unique 12 377 \n",
"top {\"databaseAccessType\": \"open\", \"databaseAcces... [] \n", "top {\"databaseAccessType\": \"open\", \"databaseAcces... [] \n",
"freq 2571 2159 \n", "freq 2624 2201 \n",
"\n", "\n",
" dataAccess \\\n", " dataAccess \\\n",
"count 2739 \n", "count 2793 \n",
"unique 145 \n", "unique 146 \n",
"top [{\"dataAccessType\": \"open\", \"dataAccessRestric... \n", "top [{\"dataAccessType\": \"open\", \"dataAccessRestric... \n",
"freq 1269 \n", "freq 1292 \n",
"\n", "\n",
" dataLicense dataUploadType \\\n", " dataLicense dataUploadType \\\n",
"count 2739 2711 \n", "count 2793 2778 \n",
"unique 2263 3 \n", "unique 2294 3 \n",
"top [{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"... restricted \n", "top [{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"... restricted \n",
"freq 64 1793 \n", "freq 71 1851 \n",
"\n", "\n",
" dataUploadLicense software versioning api pidSystem \\\n", " dataUploadLicense software versioning api pidSystem \\\n",
"count 2739 2739 1316 2739 2739 \n", "count 2793 2793 1339 2793 2793 \n",
"unique 681 23 2 1146 29 \n", "unique 695 23 2 1170 29 \n",
"top [] [\"unknown\"] yes {} [\"none\"] \n", "top [] [\"unknown\"] yes {} [\"none\"] \n",
"freq 2013 1226 1108 1498 1361 \n", "freq 2054 1216 1131 1526 1359 \n",
"\n", "\n",
" citationGuidelineURL aidSystem \\\n", " citationGuidelineURL aidSystem \\\n",
"count 1512 2739 \n", "count 1532 2793 \n",
"unique 1321 12 \n", "unique 1337 13 \n",
"top https://dataverse.org/best-practices/data-cita... [] \n", "top https://dataverse.org/best-practices/data-cita... [] \n",
"freq 72 2155 \n", "freq 76 2199 \n",
"\n", "\n",
" enhancedPublication qualityManagement certificate metadataStandard \\\n", " enhancedPublication qualityManagement certificate metadataStandard \\\n",
"count 2737 2739 2739 2739 \n", "count 2793 2793 2793 2793 \n",
"unique 3 3 14 172 \n", "unique 3 3 16 175 \n",
"top unknown yes [] [] \n", "top unknown yes [] [] \n",
"freq 1608 1515 2509 1669 \n", "freq 1643 1569 2557 1693 \n",
"\n", "\n",
" syndication remarks entryDate lastUpdate \n", " syndication remarks entryDate lastUpdate \n",
"count 2739 1674 2739 2739 \n", "count 2793 1694 2793 2793 \n",
"unique 563 1656 1275 740 \n", "unique 544 1673 1316 722 \n",
"top {} is covered by Elsevier. 2016-05-10 2021-09-03 \n", "top {} is covered by Elsevier. 2018-08-10 2021-09-03 \n",
"freq 2162 14 20 137 " "freq 2235 17 20 104 "
] ]
}, },
"execution_count": 5, "execution_count": 5,
@ -959,42 +959,42 @@
"orgIdentifier 0\n", "orgIdentifier 0\n",
"repositoryName 0\n", "repositoryName 0\n",
"repositoryName.language 0\n", "repositoryName.language 0\n",
"additionalName 569\n", "additionalName 587\n",
"repositoryURL 23\n", "repositoryURL 24\n",
"repositoryIdentifier 1876\n", "repositoryIdentifier 1769\n",
"repositoryContact 0\n", "repositoryContact 0\n",
"description 0\n", "description 0\n",
"description.language 0\n", "description.language 0\n",
"type 29\n", "type 16\n",
"size 0\n", "size 0\n",
"startDate 963\n", "startDate 993\n",
"endDate 2582\n", "endDate 2621\n",
"repositoryLanguage 0\n", "repositoryLanguage 0\n",
"subject 19\n", "subject 4\n",
"missionStatementURL 421\n", "missionStatementURL 420\n",
"contentType 7\n", "contentType 6\n",
"providerType 4\n", "providerType 5\n",
"keyword 7\n", "keyword 8\n",
"institution 1\n", "institution 1\n",
"policy 0\n", "policy 0\n",
"databaseAccess 0\n", "databaseAccess 0\n",
"databaseLicense 0\n", "databaseLicense 0\n",
"dataAccess 0\n", "dataAccess 0\n",
"dataLicense 0\n", "dataLicense 0\n",
"dataUploadType 28\n", "dataUploadType 15\n",
"dataUploadLicense 0\n", "dataUploadLicense 0\n",
"software 0\n", "software 0\n",
"versioning 1423\n", "versioning 1454\n",
"api 0\n", "api 0\n",
"pidSystem 0\n", "pidSystem 0\n",
"citationGuidelineURL 1227\n", "citationGuidelineURL 1261\n",
"aidSystem 0\n", "aidSystem 0\n",
"enhancedPublication 2\n", "enhancedPublication 0\n",
"qualityManagement 0\n", "qualityManagement 0\n",
"certificate 0\n", "certificate 0\n",
"metadataStandard 0\n", "metadataStandard 0\n",
"syndication 0\n", "syndication 0\n",
"remarks 1065\n", "remarks 1099\n",
"entryDate 0\n", "entryDate 0\n",
"lastUpdate 0\n", "lastUpdate 0\n",
"dtype: int64" "dtype: int64"
@ -1018,21 +1018,21 @@
"data": { "data": {
"text/plain": [ "text/plain": [
"contentType\n", "contentType\n",
"Archived data 658\n", "Archived data 677\n",
"Audiovisual data 542\n", "Audiovisual data 561\n",
"Configuration data 79\n", "Configuration data 84\n",
"Databases 586\n", "Databases 593\n",
"Images 1378\n", "Images 1407\n",
"Networkbased data 153\n", "Networkbased data 155\n",
"Plain text 1158\n", "Plain text 1173\n",
"Raw data 1197\n", "Raw data 1224\n",
"Scientific and statistical data formats 1685\n", "Scientific and statistical data formats 1725\n",
"Software applications 456\n", "Software applications 462\n",
"Source code 209\n", "Source code 217\n",
"Standard office documents 1684\n", "Standard office documents 1719\n",
"Structured graphics 917\n", "Structured graphics 937\n",
"Structured text 848\n", "Structured text 878\n",
"other 962\n", "other 979\n",
"dtype: int64" "dtype: int64"
] ]
}, },
@ -1055,8 +1055,8 @@
"data": { "data": {
"text/plain": [ "text/plain": [
"providerType\n", "providerType\n",
"dataProvider 2491\n", "dataProvider 2539\n",
"serviceProvider 963\n", "serviceProvider 982\n",
"dtype: int64" "dtype: int64"
] ]
}, },
@ -1079,7 +1079,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

View File

@ -78,113 +78,113 @@
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>0</th>\n", " <th>0</th>\n",
" <td>175</td>\n", " <td>134</td>\n",
" <td>{\"name\": \"hku theses online\", \"language\": \"en\"}</td>\n", " <td>{\"name\": \"eldorado - repository of the tu dort...</td>\n",
" <td>[]</td>\n", " <td>[{'name': 'eldorado - ressourcen aus und für l...</td>\n",
" <td>http://hub.hku.hk/handle/10722/1057</td>\n", " <td>https://eldorado.tu-dortmund.de</td>\n",
" <td>this is an institutional repository providing ...</td>\n", " <td>NaN</td>\n",
" <td>institutional</td>\n", " <td>institutional</td>\n",
" <td>[\"zh\", \"en\"]</td>\n",
" <td>2021-03-25 10:16:18</td>\n",
" <td>2005-12-21 12:44:08</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[bibliographic_references, theses_and_disserta...</td>\n",
" <td>[{'name': 'university of hong kong', 'alternat...</td>\n",
" <td>[]</td>\n", " <td>[]</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"cris-5.3.1-snap...</td>\n", " <td>2022-01-12 15:34:54</td>\n",
" <td>NaN</td>\n", " <td>2005-12-19 14:57:52</td>\n",
" <td>[arts, humanities, science, mathematics, socia...</td>\n",
" <td>[journal_articles, conference_and_workshop_pap...</td>\n",
" <td>[{'name': 'technische universität dortmund', '...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"\"}</td>\n",
" <td>https://eldorado.tu-dortmund.de/oai/request</td>\n",
" <td>yes</td>\n", " <td>yes</td>\n",
" <td>fully_functional</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>11850.0</td>\n", " <td>9629.0</td>\n",
" <td>20963.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>1</th>\n", " <th>1</th>\n",
" <td>64</td>\n", " <td>58</td>\n",
" <td>{\"name\": \"research support scheme - central eu...</td>\n", " <td>{\"name\": \"archive ouverte en sciences de linfo...</td>\n",
" <td>[]</td>\n", " <td>[{'acronym': '@rchivesic'}]</td>\n",
" <td>http://rss.archives.ceu.hu/</td>\n", " <td>https://archivesic.ccsd.cnrs.fr</td>\n",
" <td>this is an institutional repository collecting...</td>\n",
" <td>institutional</td>\n",
" <td>[\"cs\", \"en\", \"hu\", \"ru\"]</td>\n",
" <td>2021-03-25 09:48:31</td>\n",
" <td>2006-01-04 14:59:30</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[unpub_reports_and_working_papers]</td>\n",
" <td>[{'name': 'central european university', 'alte...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"eprints\", \"version\": \"2.2.1\"}</td>\n",
" <td>http://rss.archives.ceu.hu/perl/oai2</td>\n",
" <td>yes</td>\n",
" <td>fully_functional</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>164.0</td>\n", " <td>institutional</td>\n",
" <td>[]</td>\n",
" <td>2022-01-12 15:34:53</td>\n",
" <td>2006-01-13 12:48:32</td>\n",
" <td>[arts, science, technology, engineering, mathe...</td>\n",
" <td>[journal_articles, conference_and_workshop_pap...</td>\n",
" <td>[{'name': 'centre pour la communication scient...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"hal\", \"version\": \"\"}</td>\n",
" <td>https://api.archives-ouvertes.fr/oai/archivesic</td>\n",
" <td>yes</td>\n",
" <td>NaN</td>\n",
" <td>55492.0</td>\n",
" <td>1137498.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>2</th>\n", " <th>2</th>\n",
" <td>151</td>\n", " <td>93</td>\n",
" <td>{\"name\": \"cadmus, eui research repository\", \"l...</td>\n", " <td>{\"name\": \"digitalcommons@the texas medical cen...</td>\n",
" <td>[]</td>\n", " <td>[]</td>\n",
" <td>http://cadmus.eui.eu/</td>\n", " <td>http://digitalcommons.library.tmc.edu/</td>\n",
" <td>cadmus is the name of the eui research reposit...</td>\n", " <td>NaN</td>\n",
" <td>institutional</td>\n", " <td>institutional</td>\n",
" <td>[\"nl\", \"en\", \"fr\", \"de\", \"it\"]</td>\n", " <td>[]</td>\n",
" <td>2021-09-13 13:35:36</td>\n", " <td>2022-01-12 15:34:53</td>\n",
" <td>2006-01-04 12:07:07</td>\n", " <td>2006-02-14 11:16:12</td>\n",
" <td>[\"history and archaeology\", \"multidisciplinary...</td>\n", " <td>[health and medicine]</td>\n",
" <td>[journal_articles, theses_and_dissertations, u...</td>\n", " <td>[journal_articles, theses_and_dissertations]</td>\n",
" <td>[{'name': 'european university institute', 'al...</td>\n", " <td>[{'name': 'texas medical center', 'alternative...</td>\n",
" <td>[{\"policy_url\": \"https://www.eui.eu/research/e...</td>\n", " <td>[]</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"5.2\"}</td>\n", " <td>{\"name\": \"other\", \"version\": \"\"}</td>\n",
" <td>http://cadmus.eui.eu/oai/request</td>\n", " <td>http://digitalcommons.library.tmc.edu/do/oai/</td>\n",
" <td>yes</td>\n", " <td>yes</td>\n",
" <td>fully_functional</td>\n", " <td>NaN</td>\n",
" <td>3867.0</td>\n", " <td>2658.0</td>\n",
" <td>24869.0</td>\n", " <td>7268.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>3</th>\n", " <th>3</th>\n",
" <td>105</td>\n", " <td>68</td>\n",
" <td>{\"name\": \"document server@uhasselt\", \"language...</td>\n", " <td>{\"name\": \"cognitive sciences eprint archive\", ...</td>\n",
" <td>[{'acronym': 'cogprints'}]</td>\n",
" <td>http://cogprints.org/</td>\n",
" <td>NaN</td>\n",
" <td>disciplinary</td>\n",
" <td>[]</td>\n", " <td>[]</td>\n",
" <td>https://doclib.uhasselt.be/dspace/</td>\n", " <td>2022-01-12 15:34:53</td>\n",
" <td>this site is a university repository providing...</td>\n", " <td>2006-01-04 15:01:23</td>\n",
" <td>institutional</td>\n", " <td>[humanities, health and medicine, science, soc...</td>\n",
" <td>[\"nl\", \"en\", \"fr\", \"de\"]</td>\n",
" <td>2021-04-16 15:23:52</td>\n",
" <td>2006-01-24 15:46:44</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[journal_articles, conference_and_workshop_pap...</td>\n", " <td>[journal_articles, conference_and_workshop_pap...</td>\n",
" <td>[{'name': 'uhasselt', 'alternativeName': 'hass...</td>\n", " <td>[{'name': 'university of southampton', 'altern...</td>\n",
" <td>[]</td>\n", " <td>[]</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"1.7.2\"}</td>\n", " <td>{\"name\": \"eprints\", \"version\": \"\"}</td>\n",
" <td>http://doclib.uhasselt.be/dspace-oai/request</td>\n", " <td>http://cogprints.org/cgi/oai2</td>\n",
" <td>yes</td>\n", " <td>yes</td>\n",
" <td>fully_functional</td>\n", " <td>NaN</td>\n",
" <td>0.0</td>\n", " <td>2895.0</td>\n",
" <td>27376.0</td>\n", " <td>4277.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>4</th>\n", " <th>4</th>\n",
" <td>101</td>\n", " <td>84</td>\n",
" <td>{\"name\": \"utrecht university repository\", \"lan...</td>\n", " <td>{\"name\": \"digital commons@carleton college\", \"...</td>\n",
" <td>[]</td>\n", " <td>[]</td>\n",
" <td>http://dspace.library.uu.nl</td>\n", " <td>http://digitalcommons.carleton.edu/</td>\n",
" <td>this site is a university repository providing...</td>\n", " <td>NaN</td>\n",
" <td>institutional</td>\n", " <td>institutional</td>\n",
" <td>[\"nl\", \"en\"]</td>\n",
" <td>2021-04-16 15:22:03</td>\n",
" <td>2006-01-13 12:55:13</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[journal_articles, conference_and_workshop_pap...</td>\n",
" <td>[{'name': 'university of utrecht', 'alternativ...</td>\n",
" <td>[]</td>\n", " <td>[]</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"\"}</td>\n", " <td>2022-01-12 15:34:53</td>\n",
" <td>https://dspace.library.uu.nl/oai/request</td>\n", " <td>2006-01-04 16:07:58</td>\n",
" <td>[humanities, science, social sciences]</td>\n",
" <td>[journal_articles, unpub_reports_and_working_p...</td>\n",
" <td>[{'name': 'carleton college', 'alternativeName...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"other\", \"version\": \"\"}</td>\n",
" <td>NaN</td>\n",
" <td>yes</td>\n", " <td>yes</td>\n",
" <td>fully_functional</td>\n", " <td>NaN</td>\n",
" <td>1686.0</td>\n", " <td>NaN</td>\n",
" <td>185637.0</td>\n", " <td>42.0</td>\n",
" </tr>\n", " </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
@ -192,102 +192,95 @@
], ],
"text/plain": [ "text/plain": [
" system_metadata.id repository_metadata.name \\\n", " system_metadata.id repository_metadata.name \\\n",
"0 175 {\"name\": \"hku theses online\", \"language\": \"en\"} \n", "0 134 {\"name\": \"eldorado - repository of the tu dort... \n",
"1 64 {\"name\": \"research support scheme - central eu... \n", "1 58 {\"name\": \"archive ouverte en sciences de linfo... \n",
"2 151 {\"name\": \"cadmus, eui research repository\", \"l... \n", "2 93 {\"name\": \"digitalcommons@the texas medical cen... \n",
"3 105 {\"name\": \"document server@uhasselt\", \"language... \n", "3 68 {\"name\": \"cognitive sciences eprint archive\", ... \n",
"4 101 {\"name\": \"utrecht university repository\", \"lan... \n", "4 84 {\"name\": \"digital commons@carleton college\", \"... \n",
"\n", "\n",
" repository_metadata.alternativename repository_metadata.url \\\n", " repository_metadata.alternativename \\\n",
"0 [] http://hub.hku.hk/handle/10722/1057 \n", "0 [{'name': 'eldorado - ressourcen aus und für l... \n",
"1 [] http://rss.archives.ceu.hu/ \n", "1 [{'acronym': '@rchivesic'}] \n",
"2 [] http://cadmus.eui.eu/ \n", "2 [] \n",
"3 [] https://doclib.uhasselt.be/dspace/ \n", "3 [{'acronym': 'cogprints'}] \n",
"4 [] http://dspace.library.uu.nl \n",
"\n",
" repository_metadata.description repository_metadata.type \\\n",
"0 this is an institutional repository providing ... institutional \n",
"1 this is an institutional repository collecting... institutional \n",
"2 cadmus is the name of the eui research reposit... institutional \n",
"3 this site is a university repository providing... institutional \n",
"4 this site is a university repository providing... institutional \n",
"\n",
" repository_metadata.content_languages system_metadata.date_modified \\\n",
"0 [\"zh\", \"en\"] 2021-03-25 10:16:18 \n",
"1 [\"cs\", \"en\", \"hu\", \"ru\"] 2021-03-25 09:48:31 \n",
"2 [\"nl\", \"en\", \"fr\", \"de\", \"it\"] 2021-09-13 13:35:36 \n",
"3 [\"nl\", \"en\", \"fr\", \"de\"] 2021-04-16 15:23:52 \n",
"4 [\"nl\", \"en\"] 2021-04-16 15:22:03 \n",
"\n",
" system_metadata.date_created \\\n",
"0 2005-12-21 12:44:08 \n",
"1 2006-01-04 14:59:30 \n",
"2 2006-01-04 12:07:07 \n",
"3 2006-01-24 15:46:44 \n",
"4 2006-01-13 12:55:13 \n",
"\n",
" repository_metadata.content_subjects \\\n",
"0 [\"multidisciplinary\"] \n",
"1 [\"multidisciplinary\"] \n",
"2 [\"history and archaeology\", \"multidisciplinary... \n",
"3 [\"multidisciplinary\"] \n",
"4 [\"multidisciplinary\"] \n",
"\n",
" repository_metadata.content_types \\\n",
"0 [bibliographic_references, theses_and_disserta... \n",
"1 [unpub_reports_and_working_papers] \n",
"2 [journal_articles, theses_and_dissertations, u... \n",
"3 [journal_articles, conference_and_workshop_pap... \n",
"4 [journal_articles, conference_and_workshop_pap... \n",
"\n",
" organization \\\n",
"0 [{'name': 'university of hong kong', 'alternat... \n",
"1 [{'name': 'central european university', 'alte... \n",
"2 [{'name': 'european university institute', 'al... \n",
"3 [{'name': 'uhasselt', 'alternativeName': 'hass... \n",
"4 [{'name': 'university of utrecht', 'alternativ... \n",
"\n",
" policy_urls \\\n",
"0 [] \n",
"1 [] \n",
"2 [{\"policy_url\": \"https://www.eui.eu/research/e... \n",
"3 [] \n",
"4 [] \n", "4 [] \n",
"\n", "\n",
" repository_metadata.software \\\n", " repository_metadata.url repository_metadata.description \\\n",
"0 {\"name\": \"dspace\", \"version\": \"cris-5.3.1-snap... \n", "0 https://eldorado.tu-dortmund.de NaN \n",
"1 {\"name\": \"eprints\", \"version\": \"2.2.1\"} \n", "1 https://archivesic.ccsd.cnrs.fr NaN \n",
"2 {\"name\": \"dspace\", \"version\": \"5.2\"} \n", "2 http://digitalcommons.library.tmc.edu/ NaN \n",
"3 {\"name\": \"dspace\", \"version\": \"1.7.2\"} \n", "3 http://cogprints.org/ NaN \n",
"4 {\"name\": \"dspace\", \"version\": \"\"} \n", "4 http://digitalcommons.carleton.edu/ NaN \n",
"\n", "\n",
" repository_metadata.oai_url \\\n", " repository_metadata.type repository_metadata.content_languages \\\n",
"0 NaN \n", "0 institutional [] \n",
"1 http://rss.archives.ceu.hu/perl/oai2 \n", "1 institutional [] \n",
"2 http://cadmus.eui.eu/oai/request \n", "2 institutional [] \n",
"3 http://doclib.uhasselt.be/dspace-oai/request \n", "3 disciplinary [] \n",
"4 https://dspace.library.uu.nl/oai/request \n", "4 institutional [] \n",
"\n", "\n",
" system_metadata.publicly_visible repository_metadata.repository_status \\\n", " system_metadata.date_modified system_metadata.date_created \\\n",
"0 yes fully_functional \n", "0 2022-01-12 15:34:54 2005-12-19 14:57:52 \n",
"1 yes fully_functional \n", "1 2022-01-12 15:34:53 2006-01-13 12:48:32 \n",
"2 yes fully_functional \n", "2 2022-01-12 15:34:53 2006-02-14 11:16:12 \n",
"3 yes fully_functional \n", "3 2022-01-12 15:34:53 2006-01-04 15:01:23 \n",
"4 yes fully_functional \n", "4 2022-01-12 15:34:53 2006-01-04 16:07:58 \n",
"\n",
" repository_metadata.content_subjects \\\n",
"0 [arts, humanities, science, mathematics, socia... \n",
"1 [arts, science, technology, engineering, mathe... \n",
"2 [health and medicine] \n",
"3 [humanities, health and medicine, science, soc... \n",
"4 [humanities, science, social sciences] \n",
"\n",
" repository_metadata.content_types \\\n",
"0 [journal_articles, conference_and_workshop_pap... \n",
"1 [journal_articles, conference_and_workshop_pap... \n",
"2 [journal_articles, theses_and_dissertations] \n",
"3 [journal_articles, conference_and_workshop_pap... \n",
"4 [journal_articles, unpub_reports_and_working_p... \n",
"\n",
" organization policy_urls \\\n",
"0 [{'name': 'technische universität dortmund', '... [] \n",
"1 [{'name': 'centre pour la communication scient... [] \n",
"2 [{'name': 'texas medical center', 'alternative... [] \n",
"3 [{'name': 'university of southampton', 'altern... [] \n",
"4 [{'name': 'carleton college', 'alternativeName... [] \n",
"\n",
" repository_metadata.software \\\n",
"0 {\"name\": \"dspace\", \"version\": \"\"} \n",
"1 {\"name\": \"hal\", \"version\": \"\"} \n",
"2 {\"name\": \"other\", \"version\": \"\"} \n",
"3 {\"name\": \"eprints\", \"version\": \"\"} \n",
"4 {\"name\": \"other\", \"version\": \"\"} \n",
"\n",
" repository_metadata.oai_url \\\n",
"0 https://eldorado.tu-dortmund.de/oai/request \n",
"1 https://api.archives-ouvertes.fr/oai/archivesic \n",
"2 http://digitalcommons.library.tmc.edu/do/oai/ \n",
"3 http://cogprints.org/cgi/oai2 \n",
"4 NaN \n",
"\n",
" system_metadata.publicly_visible repository_metadata.repository_status \\\n",
"0 yes NaN \n",
"1 yes NaN \n",
"2 yes NaN \n",
"3 yes NaN \n",
"4 yes NaN \n",
"\n", "\n",
" repository_metadata.fulltext_record_count \\\n", " repository_metadata.fulltext_record_count \\\n",
"0 NaN \n", "0 9629.0 \n",
"1 NaN \n", "1 55492.0 \n",
"2 3867.0 \n", "2 2658.0 \n",
"3 0.0 \n", "3 2895.0 \n",
"4 1686.0 \n", "4 NaN \n",
"\n", "\n",
" repository_metadata.metadata_record_count \n", " repository_metadata.metadata_record_count \n",
"0 11850.0 \n", "0 20963.0 \n",
"1 164.0 \n", "1 1137498.0 \n",
"2 24869.0 \n", "2 7268.0 \n",
"3 27376.0 \n", "3 4277.0 \n",
"4 185637.0 " "4 42.0 "
] ]
}, },
"execution_count": 2, "execution_count": 2,
@ -404,67 +397,67 @@
" <tbody>\n", " <tbody>\n",
" <tr>\n", " <tr>\n",
" <th>count</th>\n", " <th>count</th>\n",
" <td>5742</td>\n", " <td>5811</td>\n",
" <td>5742</td>\n", " <td>5811</td>\n",
" <td>2147</td>\n", " <td>2155</td>\n",
" <td>5742</td>\n", " <td>5810</td>\n",
" <td>5421</td>\n", " <td>0.0</td>\n",
" <td>5742</td>\n", " <td>5810</td>\n",
" <td>5742</td>\n", " <td>5811</td>\n",
" <td>5742</td>\n", " <td>5811</td>\n",
" <td>5742</td>\n", " <td>5811</td>\n",
" <td>5742</td>\n", " <td>5644</td>\n",
" <td>5598</td>\n", " <td>5667</td>\n",
" <td>5742</td>\n", " <td>5811</td>\n",
" <td>5742</td>\n", " <td>5811</td>\n",
" <td>5742</td>\n", " <td>5811</td>\n",
" <td>4402</td>\n", " <td>4447</td>\n",
" <td>5742</td>\n", " <td>5811</td>\n",
" <td>5595</td>\n", " <td>0.0</td>\n",
" <td>2.299000e+03</td>\n", " <td>2.292000e+03</td>\n",
" <td>4.197000e+03</td>\n", " <td>4.184000e+03</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>unique</th>\n", " <th>unique</th>\n",
" <td>5742</td>\n", " <td>5811</td>\n",
" <td>5713</td>\n", " <td>5780</td>\n",
" <td>2107</td>\n", " <td>2115</td>\n",
" <td>5705</td>\n", " <td>5772</td>\n",
" <td>4619</td>\n", " <td>NaN</td>\n",
" <td>4</td>\n", " <td>4</td>\n",
" <td>330</td>\n",
" <td>2372</td>\n",
" <td>5573</td>\n",
" <td>821</td>\n",
" <td>477</td>\n",
" <td>5201</td>\n",
" <td>642</td>\n",
" <td>321</td>\n",
" <td>4370</td>\n",
" <td>1</td>\n", " <td>1</td>\n",
" <td>7</td>\n", " <td>171</td>\n",
" <td>5643</td>\n",
" <td>236</td>\n",
" <td>476</td>\n",
" <td>5212</td>\n",
" <td>678</td>\n",
" <td>32</td>\n",
" <td>4415</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>top</th>\n", " <th>top</th>\n",
" <td>175</td>\n", " <td>134</td>\n",
" <td>{\"name\": \"hiroshima associated repository port...</td>\n", " <td>{\"name\": \"arch\", \"language\": \"en\"}</td>\n",
" <td>[{'acronym': 'aura'}]</td>\n", " <td>[{'acronym': 'aura'}]</td>\n",
" <td>http://harp.lib.hiroshima-u.ac.jp/</td>\n", " <td>http://harp.lib.hiroshima-u.ac.jp/</td>\n",
" <td>this site provides access to the research outp...</td>\n", " <td>NaN</td>\n",
" <td>institutional</td>\n", " <td>institutional</td>\n",
" <td>[\"en\"]</td>\n", " <td>[]</td>\n",
" <td>2022-01-12 15:35:47</td>\n",
" <td>2020-09-18 12:53:48</td>\n", " <td>2020-09-18 12:53:48</td>\n",
" <td>2020-09-18 12:53:48</td>\n", " <td>[science, technology, engineering, mathematics...</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[theses_and_dissertations]</td>\n", " <td>[theses_and_dissertations]</td>\n",
" <td>[{'name': 'rijksuniversiteit groningen', 'alte...</td>\n", " <td>[{'name': 'rijksuniversiteit groningen', 'alte...</td>\n",
" <td>[]</td>\n", " <td>[]</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"\"}</td>\n", " <td>{\"name\": \"dspace\", \"version\": \"\"}</td>\n",
" <td>https://kidoks.bsz-bw.de/oai</td>\n", " <td>https://api.figshare.com/v2/oai</td>\n",
" <td>yes</td>\n", " <td>yes</td>\n",
" <td>fully_functional</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" </tr>\n", " </tr>\n",
@ -474,19 +467,19 @@
" <td>3</td>\n", " <td>3</td>\n",
" <td>4</td>\n", " <td>4</td>\n",
" <td>3</td>\n", " <td>3</td>\n",
" <td>95</td>\n", " <td>NaN</td>\n",
" <td>5096</td>\n", " <td>5161</td>\n",
" <td>1917</td>\n", " <td>5811</td>\n",
" <td>82</td>\n", " <td>73</td>\n",
" <td>82</td>\n", " <td>81</td>\n",
" <td>3227</td>\n", " <td>3321</td>\n",
" <td>465</td>\n", " <td>469</td>\n",
" <td>26</td>\n", " <td>26</td>\n",
" <td>5098</td>\n", " <td>5131</td>\n",
" <td>822</td>\n", " <td>2273</td>\n",
" <td>3</td>\n", " <td>3</td>\n",
" <td>5742</td>\n", " <td>5811</td>\n",
" <td>5276</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" </tr>\n", " </tr>\n",
@ -509,8 +502,8 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>5.010186e+03</td>\n", " <td>5.022890e+03</td>\n",
" <td>1.760546e+05</td>\n", " <td>1.765556e+05</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>std</th>\n", " <th>std</th>\n",
@ -531,8 +524,8 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>4.206295e+04</td>\n", " <td>4.212648e+04</td>\n",
" <td>6.600825e+06</td>\n", " <td>6.611068e+06</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>min</th>\n", " <th>min</th>\n",
@ -576,7 +569,7 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>0.000000e+00</td>\n", " <td>0.000000e+00</td>\n",
" <td>8.950000e+02</td>\n", " <td>8.937500e+02</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>50%</th>\n", " <th>50%</th>\n",
@ -597,8 +590,8 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>4.220000e+02</td>\n", " <td>4.225000e+02</td>\n",
" <td>4.026000e+03</td>\n", " <td>4.012500e+03</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>75%</th>\n", " <th>75%</th>\n",
@ -619,8 +612,8 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>2.930500e+03</td>\n", " <td>2.931500e+03</td>\n",
" <td>1.630400e+04</td>\n", " <td>1.629350e+04</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>max</th>\n", " <th>max</th>\n",
@ -649,22 +642,22 @@
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" system_metadata.id repository_metadata.name \\\n", " system_metadata.id repository_metadata.name \\\n",
"count 5742 5742 \n", "count 5811 5811 \n",
"unique 5742 5713 \n", "unique 5811 5780 \n",
"top 175 {\"name\": \"hiroshima associated repository port... \n", "top 134 {\"name\": \"arch\", \"language\": \"en\"} \n",
"freq 1 3 \n", "freq 1 3 \n",
"mean NaN NaN \n", "mean NaN NaN \n",
"std NaN NaN \n", "std NaN NaN \n",
"min NaN NaN \n", "min NaN NaN \n",
"25% NaN NaN \n", "25% NaN NaN \n",
"50% NaN NaN \n", "50% NaN NaN \n",
"75% NaN NaN \n", "75% NaN NaN \n",
"max NaN NaN \n", "max NaN NaN \n",
"\n", "\n",
" repository_metadata.alternativename \\\n", " repository_metadata.alternativename \\\n",
"count 2147 \n", "count 2155 \n",
"unique 2107 \n", "unique 2115 \n",
"top [{'acronym': 'aura'}] \n", "top [{'acronym': 'aura'}] \n",
"freq 4 \n", "freq 4 \n",
"mean NaN \n", "mean NaN \n",
@ -675,37 +668,24 @@
"75% NaN \n", "75% NaN \n",
"max NaN \n", "max NaN \n",
"\n", "\n",
" repository_metadata.url \\\n", " repository_metadata.url repository_metadata.description \\\n",
"count 5742 \n", "count 5810 0.0 \n",
"unique 5705 \n", "unique 5772 NaN \n",
"top http://harp.lib.hiroshima-u.ac.jp/ \n", "top http://harp.lib.hiroshima-u.ac.jp/ NaN \n",
"freq 3 \n", "freq 3 NaN \n",
"mean NaN \n", "mean NaN NaN \n",
"std NaN \n", "std NaN NaN \n",
"min NaN \n", "min NaN NaN \n",
"25% NaN \n", "25% NaN NaN \n",
"50% NaN \n", "50% NaN NaN \n",
"75% NaN \n", "75% NaN NaN \n",
"max NaN \n", "max NaN NaN \n",
"\n",
" repository_metadata.description \\\n",
"count 5421 \n",
"unique 4619 \n",
"top this site provides access to the research outp... \n",
"freq 95 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n", "\n",
" repository_metadata.type repository_metadata.content_languages \\\n", " repository_metadata.type repository_metadata.content_languages \\\n",
"count 5742 5742 \n", "count 5810 5811 \n",
"unique 4 330 \n", "unique 4 1 \n",
"top institutional [\"en\"] \n", "top institutional [] \n",
"freq 5096 1917 \n", "freq 5161 5811 \n",
"mean NaN NaN \n", "mean NaN NaN \n",
"std NaN NaN \n", "std NaN NaN \n",
"min NaN NaN \n", "min NaN NaN \n",
@ -715,10 +695,10 @@
"max NaN NaN \n", "max NaN NaN \n",
"\n", "\n",
" system_metadata.date_modified system_metadata.date_created \\\n", " system_metadata.date_modified system_metadata.date_created \\\n",
"count 5742 5742 \n", "count 5811 5811 \n",
"unique 2372 5573 \n", "unique 171 5643 \n",
"top 2020-09-18 12:53:48 2020-09-18 12:53:48 \n", "top 2022-01-12 15:35:47 2020-09-18 12:53:48 \n",
"freq 82 82 \n", "freq 73 81 \n",
"mean NaN NaN \n", "mean NaN NaN \n",
"std NaN NaN \n", "std NaN NaN \n",
"min NaN NaN \n", "min NaN NaN \n",
@ -727,24 +707,37 @@
"75% NaN NaN \n", "75% NaN NaN \n",
"max NaN NaN \n", "max NaN NaN \n",
"\n", "\n",
" repository_metadata.content_subjects repository_metadata.content_types \\\n", " repository_metadata.content_subjects \\\n",
"count 5742 5598 \n", "count 5644 \n",
"unique 821 477 \n", "unique 236 \n",
"top [\"multidisciplinary\"] [theses_and_dissertations] \n", "top [science, technology, engineering, mathematics... \n",
"freq 3227 465 \n", "freq 3321 \n",
"mean NaN NaN \n", "mean NaN \n",
"std NaN NaN \n", "std NaN \n",
"min NaN NaN \n", "min NaN \n",
"25% NaN NaN \n", "25% NaN \n",
"50% NaN NaN \n", "50% NaN \n",
"75% NaN NaN \n", "75% NaN \n",
"max NaN NaN \n", "max NaN \n",
"\n",
" repository_metadata.content_types \\\n",
"count 5667 \n",
"unique 476 \n",
"top [theses_and_dissertations] \n",
"freq 469 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n", "\n",
" organization policy_urls \\\n", " organization policy_urls \\\n",
"count 5742 5742 \n", "count 5811 5811 \n",
"unique 5201 642 \n", "unique 5212 678 \n",
"top [{'name': 'rijksuniversiteit groningen', 'alte... [] \n", "top [{'name': 'rijksuniversiteit groningen', 'alte... [] \n",
"freq 26 5098 \n", "freq 26 5131 \n",
"mean NaN NaN \n", "mean NaN NaN \n",
"std NaN NaN \n", "std NaN NaN \n",
"min NaN NaN \n", "min NaN NaN \n",
@ -753,56 +746,69 @@
"75% NaN NaN \n", "75% NaN NaN \n",
"max NaN NaN \n", "max NaN NaN \n",
"\n", "\n",
" repository_metadata.software repository_metadata.oai_url \\\n", " repository_metadata.software repository_metadata.oai_url \\\n",
"count 5742 4402 \n", "count 5811 4447 \n",
"unique 321 4370 \n", "unique 32 4415 \n",
"top {\"name\": \"dspace\", \"version\": \"\"} https://kidoks.bsz-bw.de/oai \n", "top {\"name\": \"dspace\", \"version\": \"\"} https://api.figshare.com/v2/oai \n",
"freq 822 3 \n", "freq 2273 3 \n",
"mean NaN NaN \n", "mean NaN NaN \n",
"std NaN NaN \n", "std NaN NaN \n",
"min NaN NaN \n", "min NaN NaN \n",
"25% NaN NaN \n", "25% NaN NaN \n",
"50% NaN NaN \n", "50% NaN NaN \n",
"75% NaN NaN \n", "75% NaN NaN \n",
"max NaN NaN \n", "max NaN NaN \n",
"\n", "\n",
" system_metadata.publicly_visible repository_metadata.repository_status \\\n", " system_metadata.publicly_visible \\\n",
"count 5742 5595 \n", "count 5811 \n",
"unique 1 7 \n", "unique 1 \n",
"top yes fully_functional \n", "top yes \n",
"freq 5742 5276 \n", "freq 5811 \n",
"mean NaN NaN \n", "mean NaN \n",
"std NaN NaN \n", "std NaN \n",
"min NaN NaN \n", "min NaN \n",
"25% NaN NaN \n", "25% NaN \n",
"50% NaN NaN \n", "50% NaN \n",
"75% NaN NaN \n", "75% NaN \n",
"max NaN NaN \n", "max NaN \n",
"\n",
" repository_metadata.repository_status \\\n",
"count 0.0 \n",
"unique NaN \n",
"top NaN \n",
"freq NaN \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n", "\n",
" repository_metadata.fulltext_record_count \\\n", " repository_metadata.fulltext_record_count \\\n",
"count 2.299000e+03 \n", "count 2.292000e+03 \n",
"unique NaN \n", "unique NaN \n",
"top NaN \n", "top NaN \n",
"freq NaN \n", "freq NaN \n",
"mean 5.010186e+03 \n", "mean 5.022890e+03 \n",
"std 4.206295e+04 \n", "std 4.212648e+04 \n",
"min 0.000000e+00 \n", "min 0.000000e+00 \n",
"25% 0.000000e+00 \n", "25% 0.000000e+00 \n",
"50% 4.220000e+02 \n", "50% 4.225000e+02 \n",
"75% 2.930500e+03 \n", "75% 2.931500e+03 \n",
"max 1.817531e+06 \n", "max 1.817531e+06 \n",
"\n", "\n",
" repository_metadata.metadata_record_count \n", " repository_metadata.metadata_record_count \n",
"count 4.197000e+03 \n", "count 4.184000e+03 \n",
"unique NaN \n", "unique NaN \n",
"top NaN \n", "top NaN \n",
"freq NaN \n", "freq NaN \n",
"mean 1.760546e+05 \n", "mean 1.765556e+05 \n",
"std 6.600825e+06 \n", "std 6.611068e+06 \n",
"min 0.000000e+00 \n", "min 0.000000e+00 \n",
"25% 8.950000e+02 \n", "25% 8.937500e+02 \n",
"50% 4.026000e+03 \n", "50% 4.012500e+03 \n",
"75% 1.630400e+04 \n", "75% 1.629350e+04 \n",
"max 4.200000e+08 " "max 4.200000e+08 "
] ]
}, },
@ -825,23 +831,23 @@
"text/plain": [ "text/plain": [
"system_metadata.id 0\n", "system_metadata.id 0\n",
"repository_metadata.name 0\n", "repository_metadata.name 0\n",
"repository_metadata.alternativename 3595\n", "repository_metadata.alternativename 3656\n",
"repository_metadata.url 0\n", "repository_metadata.url 1\n",
"repository_metadata.description 321\n", "repository_metadata.description 5811\n",
"repository_metadata.type 0\n", "repository_metadata.type 1\n",
"repository_metadata.content_languages 0\n", "repository_metadata.content_languages 0\n",
"system_metadata.date_modified 0\n", "system_metadata.date_modified 0\n",
"system_metadata.date_created 0\n", "system_metadata.date_created 0\n",
"repository_metadata.content_subjects 0\n", "repository_metadata.content_subjects 167\n",
"repository_metadata.content_types 144\n", "repository_metadata.content_types 144\n",
"organization 0\n", "organization 0\n",
"policy_urls 0\n", "policy_urls 0\n",
"repository_metadata.software 0\n", "repository_metadata.software 0\n",
"repository_metadata.oai_url 1340\n", "repository_metadata.oai_url 1364\n",
"system_metadata.publicly_visible 0\n", "system_metadata.publicly_visible 0\n",
"repository_metadata.repository_status 147\n", "repository_metadata.repository_status 5811\n",
"repository_metadata.fulltext_record_count 3443\n", "repository_metadata.fulltext_record_count 3519\n",
"repository_metadata.metadata_record_count 1545\n", "repository_metadata.metadata_record_count 1627\n",
"dtype: int64" "dtype: int64"
] ]
}, },
@ -863,17 +869,17 @@
"data": { "data": {
"text/plain": [ "text/plain": [
"repository_metadata.content_types\n", "repository_metadata.content_types\n",
"bibliographic_references 865\n", "bibliographic_references 858\n",
"books_chapters_and_sections 2194\n", "books_chapters_and_sections 2246\n",
"conference_and_workshop_papers 1981\n", "conference_and_workshop_papers 2037\n",
"datasets 401\n", "datasets 427\n",
"journal_articles 4030\n", "journal_articles 4069\n",
"learning_objects 789\n", "learning_objects 807\n",
"other_special_item_types 1759\n", "other_special_item_types 1800\n",
"patents 182\n", "patents 200\n",
"software 92\n", "software 105\n",
"theses_and_dissertations 3319\n", "theses_and_dissertations 3377\n",
"unpub_reports_and_working_papers 1904\n", "unpub_reports_and_working_papers 1953\n",
"dtype: int64" "dtype: int64"
] ]
}, },
@ -896,7 +902,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 1,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -30,7 +30,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -496,7 +496,7 @@
"4 NaN " "4 NaN "
] ]
}, },
"execution_count": 6, "execution_count": 2,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -515,7 +515,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1363,7 +1363,7 @@
"max NaN " "max NaN "
] ]
}, },
"execution_count": 7, "execution_count": 3,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -1374,7 +1374,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1420,7 +1420,7 @@
"dtype: int64" "dtype: int64"
] ]
}, },
"execution_count": 8, "execution_count": 4,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -1431,7 +1431,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1444,7 +1444,7 @@
"dtype: int64" "dtype: int64"
] ]
}, },
"execution_count": 12, "execution_count": 5,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -1463,7 +1463,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },

File diff suppressed because one or more lines are too long

View File

@ -15289,7 +15289,7 @@
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },