diff --git a/notebooks/03-overlap.ipynb b/notebooks/03-overlap.ipynb
index 9a81185..1fa3496 100644
--- a/notebooks/03-overlap.ipynb
+++ b/notebooks/03-overlap.ipynb
@@ -654,9 +654,9 @@
"
NaN | \n",
" NaN | \n",
" [\"eng\"] | \n",
- " [1 Humanities and Social Sciences, 111 Social ... | \n",
+ " [{'name': '1 Humanities and Social Sciences', ... | \n",
" NaN | \n",
- " [Databases, Plain text, Scientific and statist... | \n",
+ " [{'name': 'Databases', 'scheme': 'parse'}, {'n... | \n",
" [dataProvider] | \n",
" [FAIR, Middle East, crime, demography, economy... | \n",
" [{'institutionName': 'Odum Institute for Resea... | \n",
@@ -669,7 +669,7 @@
" [] | \n",
" [\"DataVerse\"] | \n",
" NaN | \n",
- " [] | \n",
+ " {} | \n",
" [\"DOI\"] | \n",
" NaN | \n",
" [] | \n",
@@ -699,9 +699,9 @@
" 1985 | \n",
" NaN | \n",
" [\"eng\", \"spa\"] | \n",
- " [1 Humanities and Social Sciences, 102 History... | \n",
+ " [{'name': '1 Humanities and Social Sciences', ... | \n",
" https://www.archives.gov/publications/general-... | \n",
- " [Images, Standard office documents, Structured... | \n",
+ " [{'name': 'Images', 'scheme': 'parse'}, {'name... | \n",
" [dataProvider] | \n",
" [US History] | \n",
" [{'institutionName': 'The U.S. National Archiv... | \n",
@@ -714,7 +714,7 @@
" [] | \n",
" [\"unknown\"] | \n",
" no | \n",
- " [\"https://www.archives.gov/developer#toc-appli... | \n",
+ " {\"api\": \"https://www.archives.gov/developer#to... | \n",
" [\"none\"] | \n",
" https://aad.archives.gov/aad/help/getting-star... | \n",
" [] | \n",
@@ -744,9 +744,9 @@
" 2012 | \n",
" NaN | \n",
" [\"deu\"] | \n",
- " [1 Humanities and Social Sciences, 104 Linguis... | \n",
+ " [{'name': '1 Humanities and Social Sciences', ... | \n",
" https://dgd.ids-mannheim.de/dgd/pragdb.dgd_ext... | \n",
- " [Audiovisual data, Standard office documents, ... | \n",
+ " [{'name': 'Audiovisual data', 'scheme': 'parse... | \n",
" [dataProvider, serviceProvider] | \n",
" [Australian German, FOLK, German dialects, Pfe... | \n",
" [{'institutionName': 'Institut für Deutsche Sp... | \n",
@@ -759,7 +759,7 @@
" [] | \n",
" [\"other\"] | \n",
" yes | \n",
- " [] | \n",
+ " {} | \n",
" [\"none\"] | \n",
" http://agd.ids-mannheim.de/konditionen.shtml | \n",
" [] | \n",
@@ -789,9 +789,9 @@
" 2011 | \n",
" NaN | \n",
" [\"eng\"] | \n",
- " [1 Humanities and Social Sciences, 111 Social ... | \n",
+ " [{'name': '1 Humanities and Social Sciences', ... | \n",
" https://odum.unc.edu/about/mission-vision/ | \n",
- " [Archived data, Plain text, Raw data, Scientif... | \n",
+ " [{'name': 'Archived data', 'scheme': 'parse'},... | \n",
" [dataProvider, serviceProvider] | \n",
" [FAIR, census, demographic survey, demography,... | \n",
" [{'institutionName': 'Odum Institute for Resea... | \n",
@@ -804,7 +804,7 @@
" [{\"dataUploadLicenseName\": \"Data Deposit Form\"... | \n",
" [\"DataVerse\"] | \n",
" yes | \n",
- " [\"https://guides.dataverse.org/en/latest/api/n... | \n",
+ " {\"api\": \"https://guides.dataverse.org/en/lates... | \n",
" [\"ARK\", \"DOI\", \"PURL\", \"URN\", \"hdl\"] | \n",
" https://dataverse.org/best-practices/data-cita... | \n",
" [] | \n",
@@ -834,9 +834,9 @@
" 1996-10-01 | \n",
" NaN | \n",
" [\"eng\"] | \n",
- " [1 Humanities and Social Sciences, 101 Ancient... | \n",
+ " [{'name': '1 Humanities and Social Sciences', ... | \n",
" https://archaeologydataservice.ac.uk/about/our... | \n",
- " [Archived data, Audiovisual data, Databases, I... | \n",
+ " [{'name': 'Archived data', 'scheme': 'parse'},... | \n",
" [dataProvider, serviceProvider] | \n",
" [FAIR, archaeology, cultural heritage, prehist... | \n",
" [{'institutionName': 'Arts and Humanities Rese... | \n",
@@ -849,7 +849,7 @@
" [{\"dataUploadLicenseName\": \"Guidelines for Dep... | \n",
" [\"other\"] | \n",
" yes | \n",
- " [\"https://archaeologydataservice.ac.uk/about/e... | \n",
+ " {\"api\": \"https://archaeologydataservice.ac.uk/... | \n",
" [\"DOI\"] | \n",
" https://archaeologydataservice.ac.uk/advice/te... | \n",
" [] | \n",
@@ -939,11 +939,11 @@
"4 NaN [\"eng\"] \n",
"\n",
" re3data_subject \\\n",
- "0 [1 Humanities and Social Sciences, 111 Social ... \n",
- "1 [1 Humanities and Social Sciences, 102 History... \n",
- "2 [1 Humanities and Social Sciences, 104 Linguis... \n",
- "3 [1 Humanities and Social Sciences, 111 Social ... \n",
- "4 [1 Humanities and Social Sciences, 101 Ancient... \n",
+ "0 [{'name': '1 Humanities and Social Sciences', ... \n",
+ "1 [{'name': '1 Humanities and Social Sciences', ... \n",
+ "2 [{'name': '1 Humanities and Social Sciences', ... \n",
+ "3 [{'name': '1 Humanities and Social Sciences', ... \n",
+ "4 [{'name': '1 Humanities and Social Sciences', ... \n",
"\n",
" re3data_missionStatementURL \\\n",
"0 NaN \n",
@@ -953,11 +953,11 @@
"4 https://archaeologydataservice.ac.uk/about/our... \n",
"\n",
" re3data_contentType \\\n",
- "0 [Databases, Plain text, Scientific and statist... \n",
- "1 [Images, Standard office documents, Structured... \n",
- "2 [Audiovisual data, Standard office documents, ... \n",
- "3 [Archived data, Plain text, Raw data, Scientif... \n",
- "4 [Archived data, Audiovisual data, Databases, I... \n",
+ "0 [{'name': 'Databases', 'scheme': 'parse'}, {'n... \n",
+ "1 [{'name': 'Images', 'scheme': 'parse'}, {'name... \n",
+ "2 [{'name': 'Audiovisual data', 'scheme': 'parse... \n",
+ "3 [{'name': 'Archived data', 'scheme': 'parse'},... \n",
+ "4 [{'name': 'Archived data', 'scheme': 'parse'},... \n",
"\n",
" re3data_providerType \\\n",
"0 [dataProvider] \n",
@@ -1023,11 +1023,11 @@
"4 [{\"dataUploadLicenseName\": \"Guidelines for Dep... [\"other\"] \n",
"\n",
" re3data_versioning re3data_api \\\n",
- "0 NaN [] \n",
- "1 no [\"https://www.archives.gov/developer#toc-appli... \n",
- "2 yes [] \n",
- "3 yes [\"https://guides.dataverse.org/en/latest/api/n... \n",
- "4 yes [\"https://archaeologydataservice.ac.uk/about/e... \n",
+ "0 NaN {} \n",
+ "1 no {\"api\": \"https://www.archives.gov/developer#to... \n",
+ "2 yes {} \n",
+ "3 yes {\"api\": \"https://guides.dataverse.org/en/lates... \n",
+ "4 yes {\"api\": \"https://archaeologydataservice.ac.uk/... \n",
"\n",
" re3data_pidSystem \\\n",
"0 [\"DOI\"] \n",
@@ -1136,13 +1136,16 @@
" OpenDOAR_repository_metadata.content_languages | \n",
" OpenDOAR_system_metadata.date_modified | \n",
" OpenDOAR_system_metadata.date_created | \n",
- " OpenDOAR_repository_metadata.content_subjects_phrases | \n",
+ " OpenDOAR_repository_metadata.content_subjects | \n",
" OpenDOAR_repository_metadata.content_types | \n",
" OpenDOAR_organization | \n",
" OpenDOAR_policy_urls | \n",
" OpenDOAR_repository_metadata.software | \n",
" OpenDOAR_repository_metadata.oai_url | \n",
" OpenDOAR_system_metadata.publicly_visible | \n",
+ " OpenDOAR_repository_metadata.repository_status | \n",
+ " OpenDOAR_repository_metadata.fulltext_record_count | \n",
+ " OpenDOAR_repository_metadata.metadata_record_count | \n",
" OpenDOAR_unique_id | \n",
" \n",
" \n",
@@ -1158,13 +1161,16 @@
" [\"zh\", \"en\"] | \n",
" 2021-03-25 10:16:18 | \n",
" 2005-12-21 12:44:08 | \n",
- " [multidisciplinary] | \n",
+ " [\"multidisciplinary\"] | \n",
" [bibliographic_references, theses_and_disserta... | \n",
" [{'name': 'university of hong kong', 'alternat... | \n",
" [] | \n",
" {\"name\": \"dspace\", \"version\": \"cris-5.3.1-snap... | \n",
" NaN | \n",
" yes | \n",
+ " fully_functional | \n",
+ " NaN | \n",
+ " 11850.0 | \n",
" OpenDOAR_175 | \n",
" \n",
" \n",
@@ -1178,13 +1184,16 @@
" [\"cs\", \"en\", \"hu\", \"ru\"] | \n",
" 2021-03-25 09:48:31 | \n",
" 2006-01-04 14:59:30 | \n",
- " [multidisciplinary] | \n",
+ " [\"multidisciplinary\"] | \n",
" [unpub_reports_and_working_papers] | \n",
" [{'name': 'central european university', 'alte... | \n",
" [] | \n",
" {\"name\": \"eprints\", \"version\": \"2.2.1\"} | \n",
" http://rss.archives.ceu.hu/perl/oai2 | \n",
" yes | \n",
+ " fully_functional | \n",
+ " NaN | \n",
+ " 164.0 | \n",
" OpenDOAR_64 | \n",
"
\n",
" \n",
@@ -1198,13 +1207,16 @@
" [\"nl\", \"en\", \"fr\", \"de\", \"it\"] | \n",
" 2021-09-13 13:35:36 | \n",
" 2006-01-04 12:07:07 | \n",
- " [history and archaeology, multidisciplinary, s... | \n",
+ " [\"history and archaeology\", \"multidisciplinary... | \n",
" [journal_articles, theses_and_dissertations, u... | \n",
" [{'name': 'european university institute', 'al... | \n",
" [{\"policy_url\": \"https://www.eui.eu/research/e... | \n",
" {\"name\": \"dspace\", \"version\": \"5.2\"} | \n",
" http://cadmus.eui.eu/oai/request | \n",
" yes | \n",
+ " fully_functional | \n",
+ " 3867.0 | \n",
+ " 24869.0 | \n",
" OpenDOAR_151 | \n",
"
\n",
" \n",
@@ -1218,13 +1230,16 @@
" [\"nl\", \"en\", \"fr\", \"de\"] | \n",
" 2021-04-16 15:23:52 | \n",
" 2006-01-24 15:46:44 | \n",
- " [multidisciplinary] | \n",
+ " [\"multidisciplinary\"] | \n",
" [journal_articles, conference_and_workshop_pap... | \n",
" [{'name': 'uhasselt', 'alternativeName': 'hass... | \n",
" [] | \n",
" {\"name\": \"dspace\", \"version\": \"1.7.2\"} | \n",
" http://doclib.uhasselt.be/dspace-oai/request | \n",
" yes | \n",
+ " fully_functional | \n",
+ " 0.0 | \n",
+ " 27376.0 | \n",
" OpenDOAR_105 | \n",
"
\n",
" \n",
@@ -1238,13 +1253,16 @@
" [\"nl\", \"en\"] | \n",
" 2021-04-16 15:22:03 | \n",
" 2006-01-13 12:55:13 | \n",
- " [multidisciplinary] | \n",
+ " [\"multidisciplinary\"] | \n",
" [journal_articles, conference_and_workshop_pap... | \n",
" [{'name': 'university of utrecht', 'alternativ... | \n",
" [] | \n",
" {\"name\": \"dspace\", \"version\": \"\"} | \n",
" https://dspace.library.uu.nl/oai/request | \n",
" yes | \n",
+ " fully_functional | \n",
+ " 1686.0 | \n",
+ " 185637.0 | \n",
" OpenDOAR_101 | \n",
"
\n",
" \n",
@@ -1315,12 +1333,12 @@
"3 2006-01-24 15:46:44 \n",
"4 2006-01-13 12:55:13 \n",
"\n",
- " OpenDOAR_repository_metadata.content_subjects_phrases \\\n",
- "0 [multidisciplinary] \n",
- "1 [multidisciplinary] \n",
- "2 [history and archaeology, multidisciplinary, s... \n",
- "3 [multidisciplinary] \n",
- "4 [multidisciplinary] \n",
+ " OpenDOAR_repository_metadata.content_subjects \\\n",
+ "0 [\"multidisciplinary\"] \n",
+ "1 [\"multidisciplinary\"] \n",
+ "2 [\"history and archaeology\", \"multidisciplinary... \n",
+ "3 [\"multidisciplinary\"] \n",
+ "4 [\"multidisciplinary\"] \n",
"\n",
" OpenDOAR_repository_metadata.content_types \\\n",
"0 [bibliographic_references, theses_and_disserta... \n",
@@ -1357,12 +1375,33 @@
"3 http://doclib.uhasselt.be/dspace-oai/request \n",
"4 https://dspace.library.uu.nl/oai/request \n",
"\n",
- " OpenDOAR_system_metadata.publicly_visible OpenDOAR_unique_id \n",
- "0 yes OpenDOAR_175 \n",
- "1 yes OpenDOAR_64 \n",
- "2 yes OpenDOAR_151 \n",
- "3 yes OpenDOAR_105 \n",
- "4 yes OpenDOAR_101 "
+ " OpenDOAR_system_metadata.publicly_visible \\\n",
+ "0 yes \n",
+ "1 yes \n",
+ "2 yes \n",
+ "3 yes \n",
+ "4 yes \n",
+ "\n",
+ " OpenDOAR_repository_metadata.repository_status \\\n",
+ "0 fully_functional \n",
+ "1 fully_functional \n",
+ "2 fully_functional \n",
+ "3 fully_functional \n",
+ "4 fully_functional \n",
+ "\n",
+ " OpenDOAR_repository_metadata.fulltext_record_count \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 3867.0 \n",
+ "3 0.0 \n",
+ "4 1686.0 \n",
+ "\n",
+ " OpenDOAR_repository_metadata.metadata_record_count OpenDOAR_unique_id \n",
+ "0 11850.0 OpenDOAR_175 \n",
+ "1 164.0 OpenDOAR_64 \n",
+ "2 24869.0 OpenDOAR_151 \n",
+ "3 27376.0 OpenDOAR_105 \n",
+ "4 185637.0 OpenDOAR_101 "
]
},
"execution_count": 4,
@@ -1553,8 +1592,8 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " [opendoar, celestial] | \n",
- " [669, 58] | \n",
+ " [celestial, opendoar] | \n",
+ " [58, 669] | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1632,7 +1671,7 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " [opendoar, celestial] | \n",
+ " [celestial, opendoar] | \n",
" [526, 258] | \n",
" NaN | \n",
" NaN | \n",
@@ -1768,8 +1807,8 @@
" TRUE | \n",
" TRUE | \n",
" TRUE | \n",
- " [Climate Service Center 2.0, Helmholtz-Zentrum... | \n",
- " [http://www.hzg.de/, http://www.klimzug.de/de/... | \n",
+ " [Helmholtz-Zentrum Geesthacht, Climate Service... | \n",
+ " [http://www.hzg.de/, http://www.climateservice... | \n",
" de | \n",
" Hamburg | \n",
" 53.5511 | \n",
@@ -1777,7 +1816,7 @@
" opus | \n",
" geoname_2_DE | \n",
" other | \n",
- " [GE, GF, G1, S1, HD] | \n",
+ " [GE, S1, G1, GF, HD] | \n",
" 2015-07-02 08:08:31 | \n",
" NaN | \n",
" NaN | \n",
@@ -1790,7 +1829,7 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " [opendoar, celestial] | \n",
+ " [celestial, opendoar] | \n",
" [3408, 5881] | \n",
" NaN | \n",
" NaN | \n",
@@ -1998,14 +2037,14 @@
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
- "3 [Climate Service Center 2.0, Helmholtz-Zentrum... \n",
+ "3 [Helmholtz-Zentrum Geesthacht, Climate Service... \n",
"4 Skidmore College \n",
"\n",
" roar_organisation_home_page roar_location_country \\\n",
"0 NaN fr \n",
"1 NaN se \n",
"2 NaN pt \n",
- "3 [http://www.hzg.de/, http://www.klimzug.de/de/... de \n",
+ "3 [http://www.hzg.de/, http://www.climateservice... de \n",
"4 http://www.skidmore.edu/ us \n",
"\n",
" roar_location_city roar_location_latitude roar_location_longitude \\\n",
@@ -2019,7 +2058,7 @@
"0 hal geoname_2_FR other NaN \n",
"1 diva geoname_2_SE other NaN \n",
"2 dspace geoname_2_PT other NaN \n",
- "3 opus geoname_2_DE other [GE, GF, G1, S1, HD] \n",
+ "3 opus geoname_2_DE other [GE, S1, G1, GF, HD] \n",
"4 bepress geoname_2_US other NaN \n",
"\n",
" roar_date roar_note roar_suggestions roar_activity_low \\\n",
@@ -2051,10 +2090,10 @@
"4 NaN NaN NaN \n",
"\n",
" roar_registry_name roar_registry_id roar_submit_to \\\n",
- "0 [opendoar, celestial] [669, 58] NaN \n",
- "1 [opendoar, celestial] [526, 258] NaN \n",
+ "0 [celestial, opendoar] [58, 669] NaN \n",
+ "1 [celestial, opendoar] [526, 258] NaN \n",
"2 NaN NaN NaN \n",
- "3 [opendoar, celestial] [3408, 5881] NaN \n",
+ "3 [celestial, opendoar] [3408, 5881] NaN \n",
"4 celestial 5882 NaN \n",
"\n",
" roar_submitted_to_name roar_submitted_to_done roar_webometrics_rank \\\n",
@@ -2268,7 +2307,7 @@
" dspace | \n",
" geoname_2_UA | \n",
" other | \n",
- " [H1, L1, AC, D204, B1, D1, DK, BF, BS, HM, BL,... | \n",
+ " [D204, BS, BL, B1, D901, DK, H1, HM, L1, BR, A... | \n",
" 2015-07-07 12:38:37 | \n",
" NaN | \n",
" NaN | \n",
@@ -2281,7 +2320,7 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " [opendoar, celestial] | \n",
+ " [celestial, opendoar] | \n",
" [3410, 5883] | \n",
" NaN | \n",
" NaN | \n",
@@ -2293,7 +2332,7 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " [russell_group, ivy_league] | \n",
+ " [ivy_league, russell_group] | \n",
" roar_10013 | \n",
" \n",
" \n",
@@ -2350,7 +2389,7 @@
"7 NaN NaN dspace geoname_2_UA \n",
"\n",
" roar_version roar_subjects \\\n",
- "7 other [H1, L1, AC, D204, B1, D1, DK, BF, BS, HM, BL,... \n",
+ "7 other [D204, BS, BL, B1, D901, DK, H1, HM, L1, BR, A... \n",
"\n",
" roar_date roar_note roar_suggestions roar_activity_low \\\n",
"7 2015-07-07 12:38:37 NaN NaN NaN \n",
@@ -2362,7 +2401,7 @@
"7 NaN NaN NaN \n",
"\n",
" roar_fulltexts_rdocs roar_registry_name roar_registry_id roar_submit_to \\\n",
- "7 NaN [opendoar, celestial] [3410, 5883] NaN \n",
+ "7 NaN [celestial, opendoar] [3410, 5883] NaN \n",
"\n",
" roar_submitted_to_name roar_submitted_to_done roar_webometrics_rank \\\n",
"7 NaN NaN NaN \n",
@@ -2374,7 +2413,7 @@
"7 NaN NaN NaN \n",
"\n",
" roar_total_deposits roar_association roar_unique_id \n",
- "7 NaN [russell_group, ivy_league] roar_10013 "
+ "7 NaN [ivy_league, russell_group] roar_10013 "
]
},
"execution_count": 6,
@@ -2752,7 +2791,7 @@
},
{
"cell_type": "code",
- "execution_count": 65,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -2810,7 +2849,7 @@
"sum 58 58 58 58 58"
]
},
- "execution_count": 65,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -2822,7 +2861,7 @@
},
{
"cell_type": "code",
- "execution_count": 64,
+ "execution_count": 15,
"metadata": {},
"outputs": [
{
@@ -2880,7 +2919,7 @@
"sum 6 6 6 6 6"
]
},
- "execution_count": 64,
+ "execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@@ -2892,7 +2931,7 @@
},
{
"cell_type": "code",
- "execution_count": 63,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -2950,7 +2989,7 @@
"sum 518 518 518 518 518"
]
},
- "execution_count": 63,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -2962,7 +3001,7 @@
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 17,
"metadata": {},
"outputs": [
{
@@ -2976,7 +3015,7 @@
"dtype: int64"
]
},
- "execution_count": 53,
+ "execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
@@ -2995,7 +3034,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 18,
"metadata": {},
"outputs": [
{
@@ -3136,7 +3175,7 @@
"dedup::03e0704b5690a2dee1861dc3ad3316c9 {roar} "
]
},
- "execution_count": 14,
+ "execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
@@ -3150,7 +3189,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 19,
"metadata": {},
"outputs": [
{
@@ -3291,7 +3330,7 @@
"dedup::03e0704b5690a2dee1861dc3ad3316c9 roar "
]
},
- "execution_count": 15,
+ "execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
@@ -3303,7 +3342,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 20,
"metadata": {},
"outputs": [
{
@@ -3379,7 +3418,7 @@
"roar 121 121 121 121 121"
]
},
- "execution_count": 16,
+ "execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
@@ -3390,7 +3429,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
@@ -3557,7 +3596,7 @@
"[287 rows x 6 columns]"
]
},
- "execution_count": 17,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -3576,7 +3615,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -3769,7 +3808,7 @@
"[440 rows x 6 columns]"
]
},
- "execution_count": 18,
+ "execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
@@ -3793,7 +3832,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
@@ -3986,7 +4025,7 @@
"[3890 rows x 6 columns]"
]
},
- "execution_count": 19,
+ "execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
@@ -4007,7 +4046,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -4022,7 +4061,7 @@
"dtype: int64"
]
},
- "execution_count": 20,
+ "execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
@@ -4033,7 +4072,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 25,
"metadata": {},
"outputs": [
{
@@ -4048,7 +4087,7 @@
"dtype: int64"
]
},
- "execution_count": 21,
+ "execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
@@ -4059,7 +4098,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
@@ -4068,7 +4107,7 @@
"2191"
]
},
- "execution_count": 22,
+ "execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
@@ -4079,7 +4118,7 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": 27,
"metadata": {},
"outputs": [
{
@@ -4088,7 +4127,7 @@
"2191"
]
},
- "execution_count": 23,
+ "execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
@@ -4106,7 +4145,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 28,
"metadata": {},
"outputs": [
{
@@ -4225,13 +4264,16 @@
" OpenDOAR_repository_metadata.content_languages | \n",
" OpenDOAR_system_metadata.date_modified | \n",
" OpenDOAR_system_metadata.date_created | \n",
- " OpenDOAR_repository_metadata.content_subjects_phrases | \n",
+ " OpenDOAR_repository_metadata.content_subjects | \n",
" OpenDOAR_repository_metadata.content_types | \n",
" OpenDOAR_organization | \n",
" OpenDOAR_policy_urls | \n",
" OpenDOAR_repository_metadata.software | \n",
" OpenDOAR_repository_metadata.oai_url | \n",
" OpenDOAR_system_metadata.publicly_visible | \n",
+ " OpenDOAR_repository_metadata.repository_status | \n",
+ " OpenDOAR_repository_metadata.fulltext_record_count | \n",
+ " OpenDOAR_repository_metadata.metadata_record_count | \n",
" OpenDOAR_unique_id | \n",
" roar_eprintid | \n",
" roar_rev_number | \n",
@@ -4417,6 +4459,9 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" 8237 | \n",
" 17 | \n",
" archive | \n",
@@ -4599,6 +4644,9 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" 2820 | \n",
" 525 | \n",
" archive | \n",
@@ -4639,7 +4687,7 @@
" TRUE | \n",
" TRUE | \n",
" FALSE | \n",
- " [USU Library, University of Sumatera Utara] | \n",
+ " [University of Sumatera Utara, USU Library] | \n",
" [http://library.usu.ac.id, http://www.usu.ac.id] | \n",
" id | \n",
" Medan | \n",
@@ -4661,8 +4709,8 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " [roarmap, opendoar, celestial] | \n",
- " [283, 1717, 2101] | \n",
+ " [roarmap, celestial, opendoar] | \n",
+ " [1717, 2101, 283] | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -4781,6 +4829,9 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" 9487 | \n",
" 16 | \n",
" archive | \n",
@@ -4963,6 +5014,9 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" 1241 | \n",
" 583 | \n",
" archive | \n",
@@ -5025,8 +5079,8 @@
" 0 | \n",
" 0 | \n",
" 0 | \n",
- " [opendoar, celestial] | \n",
- " [1779, 1627] | \n",
+ " [celestial, opendoar] | \n",
+ " [1627, 1779] | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -5145,6 +5199,9 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" 15142 | \n",
" 11 | \n",
" archive | \n",
@@ -5584,12 +5641,12 @@
"3 NaN \n",
"4 NaN \n",
"\n",
- " OpenDOAR_repository_metadata.content_subjects_phrases \\\n",
- "0 NaN \n",
- "1 NaN \n",
- "2 NaN \n",
- "3 NaN \n",
- "4 NaN \n",
+ " OpenDOAR_repository_metadata.content_subjects \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.content_types OpenDOAR_organization \\\n",
"0 NaN NaN \n",
@@ -5612,26 +5669,47 @@
"3 NaN \n",
"4 NaN \n",
"\n",
- " OpenDOAR_system_metadata.publicly_visible OpenDOAR_unique_id roar_eprintid \\\n",
- "0 NaN NaN 8237 \n",
- "1 NaN NaN 2820 \n",
- "2 NaN NaN 9487 \n",
- "3 NaN NaN 1241 \n",
- "4 NaN NaN 15142 \n",
+ " OpenDOAR_system_metadata.publicly_visible \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
"\n",
- " roar_rev_number roar_eprint_status roar_userid roar_importid roar_source \\\n",
- "0 17 archive 5268 NaN NaN \n",
- "1 525 archive 65 NaN NaN \n",
- "2 16 archive 6458 NaN NaN \n",
- "3 583 archive 1 NaN NaN \n",
- "4 11 archive 12132 NaN NaN \n",
+ " OpenDOAR_repository_metadata.repository_status \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
"\n",
- " roar_dir roar_datestamp roar_lastmod \\\n",
- "0 disk0/00/00/82/37 2014-05-15 11:23:30 2014-05-19 05:42:47 \n",
- "1 disk0/00/00/28/20 2010-07-29 01:40:27 2012-01-19 11:37:49 \n",
- "2 disk0/00/00/94/87 2015-05-15 14:03:55 2016-03-21 20:21:02 \n",
- "3 disk0/00/00/12/41 2010-01-06 13:45:32 2011-07-18 05:57:23 \n",
- "4 disk0/00/01/51/42 2020-08-08 12:35:50 2021-01-25 22:45:10 \n",
+ " OpenDOAR_repository_metadata.fulltext_record_count \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " OpenDOAR_repository_metadata.metadata_record_count OpenDOAR_unique_id \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " roar_eprintid roar_rev_number roar_eprint_status roar_userid roar_importid \\\n",
+ "0 8237 17 archive 5268 NaN \n",
+ "1 2820 525 archive 65 NaN \n",
+ "2 9487 16 archive 6458 NaN \n",
+ "3 1241 583 archive 1 NaN \n",
+ "4 15142 11 archive 12132 NaN \n",
+ "\n",
+ " roar_source roar_dir roar_datestamp roar_lastmod \\\n",
+ "0 NaN disk0/00/00/82/37 2014-05-15 11:23:30 2014-05-19 05:42:47 \n",
+ "1 NaN disk0/00/00/28/20 2010-07-29 01:40:27 2012-01-19 11:37:49 \n",
+ "2 NaN disk0/00/00/94/87 2015-05-15 14:03:55 2016-03-21 20:21:02 \n",
+ "3 NaN disk0/00/00/12/41 2010-01-06 13:45:32 2011-07-18 05:57:23 \n",
+ "4 NaN disk0/00/01/51/42 2020-08-08 12:35:50 2021-01-25 22:45:10 \n",
"\n",
" roar_status_changed roar_type roar_succeeds roar_commentary \\\n",
"0 2014-05-15 11:23:30 institutional NaN NaN \n",
@@ -5719,7 +5797,7 @@
"\n",
" roar_open_access roar_mandate roar_organisation_title \\\n",
"0 FALSE TRUE Università degli Studi di Milano \n",
- "1 TRUE FALSE [USU Library, University of Sumatera Utara] \n",
+ "1 TRUE FALSE [University of Sumatera Utara, USU Library] \n",
"2 TRUE FALSE National Research Foundation of South Africa \n",
"3 NaN NaN Swansea Metropolitan University \n",
"4 FALSE FALSE NaN \n",
@@ -5775,9 +5853,9 @@
"\n",
" roar_registry_name roar_registry_id \\\n",
"0 celestial 1596 \n",
- "1 [roarmap, opendoar, celestial] [283, 1717, 2101] \n",
+ "1 [roarmap, celestial, opendoar] [1717, 2101, 283] \n",
"2 roarmap NaN \n",
- "3 [opendoar, celestial] [1779, 1627] \n",
+ "3 [celestial, opendoar] [1627, 1779] \n",
"4 opendoar http://v2.sherpa.ac.uk/id/repository/4422 \n",
"\n",
" roar_submit_to roar_submitted_to_name roar_submitted_to_done \\\n",
@@ -5809,7 +5887,7 @@
"4 NaN NaN roar_15142 "
]
},
- "execution_count": 24,
+ "execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
@@ -5824,7 +5902,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": 29,
"metadata": {},
"outputs": [
{
@@ -5943,13 +6021,16 @@
" OpenDOAR_repository_metadata.content_languages | \n",
" OpenDOAR_system_metadata.date_modified | \n",
" OpenDOAR_system_metadata.date_created | \n",
- " OpenDOAR_repository_metadata.content_subjects_phrases | \n",
+ " OpenDOAR_repository_metadata.content_subjects | \n",
" OpenDOAR_repository_metadata.content_types | \n",
" OpenDOAR_organization | \n",
" OpenDOAR_policy_urls | \n",
" OpenDOAR_repository_metadata.software | \n",
" OpenDOAR_repository_metadata.oai_url | \n",
" OpenDOAR_system_metadata.publicly_visible | \n",
+ " OpenDOAR_repository_metadata.repository_status | \n",
+ " OpenDOAR_repository_metadata.fulltext_record_count | \n",
+ " OpenDOAR_repository_metadata.metadata_record_count | \n",
" OpenDOAR_unique_id | \n",
" roar_eprintid | \n",
" roar_rev_number | \n",
@@ -6127,13 +6208,16 @@
" [\"es\"] | \n",
" 2019-10-17 14:34:31 | \n",
" 2010-12-01 11:11:57 | \n",
- " [business and economics, education] | \n",
+ " [\"business and economics\", \"education\"] | \n",
" [journal_articles, conference_and_workshop_pap... | \n",
" [{'name': 'escuela de hotelería y turismo de c... | \n",
" [] | \n",
" {\"name\": \"dspace\", \"version\": \"1.6.2\"} | \n",
" NaN | \n",
" yes | \n",
+ " trial | \n",
+ " NaN | \n",
+ " 286.0 | \n",
" OpenDOAR_1996 | \n",
" NaN | \n",
" NaN | \n",
@@ -6317,6 +6401,9 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" 2312 | \n",
" 736 | \n",
" archive | \n",
@@ -6379,8 +6466,8 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " [opendoar, celestial] | \n",
- " [1832, 1149] | \n",
+ " [celestial, opendoar] | \n",
+ " [1149, 1832] | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -6491,13 +6578,16 @@
" [\"ja\", \"en\"] | \n",
" 2020-09-09 11:57:56 | \n",
" 2007-10-09 09:09:40 | \n",
- " [technology general] | \n",
+ " [\"technology general\"] | \n",
" [journal_articles, unpub_reports_and_working_p... | \n",
" [{'name': 'kitami institute of technology', 'a... | \n",
" [] | \n",
" {\"name\": \"weko\", \"version\": \"\"} | \n",
" http://kitami-it.repo.nii.ac.jp/oai | \n",
" yes | \n",
+ " fully_functional | \n",
+ " 1534.0 | \n",
+ " 8681.0 | \n",
" OpenDOAR_1035 | \n",
" NaN | \n",
" NaN | \n",
@@ -6681,6 +6771,9 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" 5779 | \n",
" 9 | \n",
" archive | \n",
@@ -6743,8 +6836,8 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " [opendoar, celestial] | \n",
- " [2545, 5072] | \n",
+ " [celestial, opendoar] | \n",
+ " [5072, 2545] | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -6863,6 +6956,9 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" 11212 | \n",
" 12 | \n",
" archive | \n",
@@ -6912,7 +7008,7 @@
" opus | \n",
" geoname_2_DE | \n",
" other | \n",
- " [HB, GE, T1] | \n",
+ " [T1, HB, GE] | \n",
" 2016-04-28 13:58:38 | \n",
" NaN | \n",
" please delete ID 5891 | \n",
@@ -6925,7 +7021,7 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " [opendoar, celestial] | \n",
+ " [celestial, opendoar] | \n",
" [2539, 6112] | \n",
" NaN | \n",
" NaN | \n",
@@ -7316,12 +7412,12 @@
"3 NaN \n",
"4 NaN \n",
"\n",
- " OpenDOAR_repository_metadata.content_subjects_phrases \\\n",
- "0 [business and economics, education] \n",
- "1 NaN \n",
- "2 [technology general] \n",
- "3 NaN \n",
- "4 NaN \n",
+ " OpenDOAR_repository_metadata.content_subjects \\\n",
+ "0 [\"business and economics\", \"education\"] \n",
+ "1 NaN \n",
+ "2 [\"technology general\"] \n",
+ "3 NaN \n",
+ "4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.content_types \\\n",
"0 [journal_articles, conference_and_workshop_pap... \n",
@@ -7351,26 +7447,47 @@
"3 NaN \n",
"4 NaN \n",
"\n",
- " OpenDOAR_system_metadata.publicly_visible OpenDOAR_unique_id roar_eprintid \\\n",
- "0 yes OpenDOAR_1996 NaN \n",
- "1 NaN NaN 2312 \n",
- "2 yes OpenDOAR_1035 NaN \n",
- "3 NaN NaN 5779 \n",
- "4 NaN NaN 11212 \n",
+ " OpenDOAR_system_metadata.publicly_visible \\\n",
+ "0 yes \n",
+ "1 NaN \n",
+ "2 yes \n",
+ "3 NaN \n",
+ "4 NaN \n",
"\n",
- " roar_rev_number roar_eprint_status roar_userid roar_importid roar_source \\\n",
- "0 NaN NaN NaN NaN NaN \n",
- "1 736 archive 1 NaN NaN \n",
- "2 NaN NaN NaN NaN NaN \n",
- "3 9 archive 8 NaN NaN \n",
- "4 12 archive 5611 NaN NaN \n",
+ " OpenDOAR_repository_metadata.repository_status \\\n",
+ "0 trial \n",
+ "1 NaN \n",
+ "2 fully_functional \n",
+ "3 NaN \n",
+ "4 NaN \n",
"\n",
- " roar_dir roar_datestamp roar_lastmod \\\n",
- "0 NaN NaN NaN \n",
- "1 disk0/00/00/23/12 2010-01-14 12:10:06 2011-07-18 06:01:08 \n",
- "2 NaN NaN NaN \n",
- "3 disk0/00/00/57/79 2012-12-12 04:54:20 2012-12-15 02:36:20 \n",
- "4 disk0/00/01/12/12 2016-05-04 11:37:14 2016-05-07 01:37:18 \n",
+ " OpenDOAR_repository_metadata.fulltext_record_count \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 1534.0 \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " OpenDOAR_repository_metadata.metadata_record_count OpenDOAR_unique_id \\\n",
+ "0 286.0 OpenDOAR_1996 \n",
+ "1 NaN NaN \n",
+ "2 8681.0 OpenDOAR_1035 \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " roar_eprintid roar_rev_number roar_eprint_status roar_userid roar_importid \\\n",
+ "0 NaN NaN NaN NaN NaN \n",
+ "1 2312 736 archive 1 NaN \n",
+ "2 NaN NaN NaN NaN NaN \n",
+ "3 5779 9 archive 8 NaN \n",
+ "4 11212 12 archive 5611 NaN \n",
+ "\n",
+ " roar_source roar_dir roar_datestamp roar_lastmod \\\n",
+ "0 NaN NaN NaN NaN \n",
+ "1 NaN disk0/00/00/23/12 2010-01-14 12:10:06 2011-07-18 06:01:08 \n",
+ "2 NaN NaN NaN NaN \n",
+ "3 NaN disk0/00/00/57/79 2012-12-12 04:54:20 2012-12-15 02:36:20 \n",
+ "4 NaN disk0/00/01/12/12 2016-05-04 11:37:14 2016-05-07 01:37:18 \n",
"\n",
" roar_status_changed roar_type roar_succeeds roar_commentary \\\n",
"0 NaN NaN NaN NaN \n",
@@ -7489,7 +7606,7 @@
"1 other NaN 2005-06-07 12:57:08 NaN \n",
"2 NaN NaN NaN NaN \n",
"3 other NaN 2012-08-05 15:12:12 NaN \n",
- "4 other [HB, GE, T1] 2016-04-28 13:58:38 NaN \n",
+ "4 other [T1, HB, GE] 2016-04-28 13:58:38 NaN \n",
"\n",
" roar_suggestions roar_activity_low roar_activity_medium \\\n",
"0 NaN NaN NaN \n",
@@ -7521,10 +7638,10 @@
"\n",
" roar_registry_name roar_registry_id roar_submit_to \\\n",
"0 NaN NaN NaN \n",
- "1 [opendoar, celestial] [1832, 1149] NaN \n",
+ "1 [celestial, opendoar] [1149, 1832] NaN \n",
"2 NaN NaN NaN \n",
- "3 [opendoar, celestial] [2545, 5072] NaN \n",
- "4 [opendoar, celestial] [2539, 6112] NaN \n",
+ "3 [celestial, opendoar] [5072, 2545] NaN \n",
+ "4 [celestial, opendoar] [2539, 6112] NaN \n",
"\n",
" roar_submitted_to_name roar_submitted_to_done roar_webometrics_rank \\\n",
"0 NaN NaN NaN \n",
@@ -7555,7 +7672,7 @@
"4 NaN NaN roar_11212 "
]
},
- "execution_count": 25,
+ "execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
@@ -7570,7 +7687,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": 30,
"metadata": {},
"outputs": [
{
@@ -7689,13 +7806,16 @@
" OpenDOAR_repository_metadata.content_languages | \n",
" OpenDOAR_system_metadata.date_modified | \n",
" OpenDOAR_system_metadata.date_created | \n",
- " OpenDOAR_repository_metadata.content_subjects_phrases | \n",
+ " OpenDOAR_repository_metadata.content_subjects | \n",
" OpenDOAR_repository_metadata.content_types | \n",
" OpenDOAR_organization | \n",
" OpenDOAR_policy_urls | \n",
" OpenDOAR_repository_metadata.software | \n",
" OpenDOAR_repository_metadata.oai_url | \n",
" OpenDOAR_system_metadata.publicly_visible | \n",
+ " OpenDOAR_repository_metadata.repository_status | \n",
+ " OpenDOAR_repository_metadata.fulltext_record_count | \n",
+ " OpenDOAR_repository_metadata.metadata_record_count | \n",
" OpenDOAR_unique_id | \n",
" roar_eprintid | \n",
" roar_rev_number | \n",
@@ -7873,13 +7993,16 @@
" [\"zh\", \"nl\", \"en\", \"fr\", \"de\", \"it\", \"ja\", \"pt... | \n",
" 2021-09-13 13:35:44 | \n",
" 2007-10-10 16:16:02 | \n",
- " [multidisciplinary] | \n",
+ " [\"multidisciplinary\"] | \n",
" [journal_articles, conference_and_workshop_pap... | \n",
" [{'name': 'university of oxford', 'alternative... | \n",
" [{\"policy_url\": \"https://libguides.bodleian.ox... | \n",
" {\"name\": \"fedora\", \"version\": \"4.6.2\"} | \n",
" https://ora.ox.ac.uk/oai2 | \n",
" yes | \n",
+ " fully_functional | \n",
+ " 20.0 | \n",
+ " 239671.0 | \n",
" OpenDOAR_1064 | \n",
" NaN | \n",
" NaN | \n",
@@ -8055,13 +8178,16 @@
" [\"en\"] | \n",
" 2021-02-18 18:13:34 | \n",
" 2019-09-28 04:24:47 | \n",
- " [multidisciplinary] | \n",
+ " [\"multidisciplinary\"] | \n",
" [journal_articles, conference_and_workshop_pap... | \n",
" [{'name': 'georgia southern university', 'alte... | \n",
" [] | \n",
" {\"name\": \"digital_commons\", \"version\": \"\"} | \n",
" https://digitalcommons.georgiasouthern.edu/do/oai | \n",
" yes | \n",
+ " fully_functional | \n",
+ " 26851.0 | \n",
+ " 78076.0 | \n",
" OpenDOAR_8648 | \n",
" NaN | \n",
" NaN | \n",
@@ -8237,13 +8363,16 @@
" [\"en\", \"ja\"] | \n",
" 2021-05-21 18:04:32 | \n",
" 2020-07-13 10:09:55 | \n",
- " [science general] | \n",
+ " [\"science general\"] | \n",
" [journal_articles, conference_and_workshop_pap... | \n",
" [{'name': 'national institute for materials sc... | \n",
" [] | \n",
" {\"name\": \"fedora\", \"version\": \"\"} | \n",
" https://mdr.nims.go.jp/catalog/oai | \n",
" yes | \n",
+ " fully_functional | \n",
+ " NaN | \n",
+ " NaN | \n",
" OpenDOAR_9713 | \n",
" NaN | \n",
" NaN | \n",
@@ -8419,13 +8548,16 @@
" [\"en\"] | \n",
" 2021-09-13 13:35:39 | \n",
" 2006-08-04 09:09:20 | \n",
- " [multidisciplinary] | \n",
+ " [\"multidisciplinary\"] | \n",
" [journal_articles, theses_and_dissertations, u... | \n",
" [{'name': 'university of maryland', 'alternati... | \n",
" [{\"policy_url\": \"http://drum.lib.umd.edu/page/... | \n",
" {\"name\": \"dspace\", \"version\": \"4.1.0\"} | \n",
" http://drum.lib.umd.edu/oai/request | \n",
" yes | \n",
+ " fully_functional | \n",
+ " NaN | \n",
+ " 20513.0 | \n",
" OpenDOAR_427 | \n",
" NaN | \n",
" NaN | \n",
@@ -8564,9 +8696,9 @@
" NaN | \n",
" NaN | \n",
" [\"eng\"] | \n",
- " [1 Humanities and Social Sciences, 11 Humaniti... | \n",
+ " [{'name': '1 Humanities and Social Sciences', ... | \n",
" NaN | \n",
- " [Audiovisual data, Images, Standard office doc... | \n",
+ " [{'name': 'Audiovisual data', 'scheme': 'parse... | \n",
" [dataProvider] | \n",
" [multidisciplinary] | \n",
" [{'institutionName': 'University of Calgary, L... | \n",
@@ -8579,7 +8711,7 @@
" [{\"dataUploadLicenseName\": \"Submission Policy\"... | \n",
" [\"DSpace\"] | \n",
" NaN | \n",
- " [] | \n",
+ " {} | \n",
" [\"DOI\", \"hdl\"] | \n",
" NaN | \n",
" [] | \n",
@@ -8685,6 +8817,9 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" \n",
" \n",
"\n",
@@ -8962,7 +9097,7 @@
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
- "4 [1 Humanities and Social Sciences, 11 Humaniti... \n",
+ "4 [{'name': '1 Humanities and Social Sciences', ... \n",
"\n",
" re3data_missionStatementURL \\\n",
"0 NaN \n",
@@ -8976,7 +9111,7 @@
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
- "4 [Audiovisual data, Images, Standard office doc... [dataProvider] \n",
+ "4 [{'name': 'Audiovisual data', 'scheme': 'parse... [dataProvider] \n",
"\n",
" re3data_keyword re3data_institution \\\n",
"0 NaN NaN \n",
@@ -9025,7 +9160,7 @@
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
- "4 NaN [] [\"DOI\", \"hdl\"] \n",
+ "4 NaN {} [\"DOI\", \"hdl\"] \n",
"\n",
" re3data_citationGuidelineURL re3data_aidSystem re3data_enhancedPublication \\\n",
"0 NaN NaN NaN \n",
@@ -9118,12 +9253,12 @@
"3 2006-08-04 09:09:20 \n",
"4 NaN \n",
"\n",
- " OpenDOAR_repository_metadata.content_subjects_phrases \\\n",
- "0 [multidisciplinary] \n",
- "1 [multidisciplinary] \n",
- "2 [science general] \n",
- "3 [multidisciplinary] \n",
- "4 NaN \n",
+ " OpenDOAR_repository_metadata.content_subjects \\\n",
+ "0 [\"multidisciplinary\"] \n",
+ "1 [\"multidisciplinary\"] \n",
+ "2 [\"science general\"] \n",
+ "3 [\"multidisciplinary\"] \n",
+ "4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.content_types \\\n",
"0 [journal_articles, conference_and_workshop_pap... \n",
@@ -9160,47 +9295,68 @@
"3 http://drum.lib.umd.edu/oai/request \n",
"4 NaN \n",
"\n",
- " OpenDOAR_system_metadata.publicly_visible OpenDOAR_unique_id roar_eprintid \\\n",
- "0 yes OpenDOAR_1064 NaN \n",
- "1 yes OpenDOAR_8648 NaN \n",
- "2 yes OpenDOAR_9713 NaN \n",
- "3 yes OpenDOAR_427 NaN \n",
- "4 NaN NaN NaN \n",
+ " OpenDOAR_system_metadata.publicly_visible \\\n",
+ "0 yes \n",
+ "1 yes \n",
+ "2 yes \n",
+ "3 yes \n",
+ "4 NaN \n",
"\n",
- " roar_rev_number roar_eprint_status roar_userid roar_importid roar_source \\\n",
- "0 NaN NaN NaN NaN NaN \n",
- "1 NaN NaN NaN NaN NaN \n",
- "2 NaN NaN NaN NaN NaN \n",
- "3 NaN NaN NaN NaN NaN \n",
- "4 NaN NaN NaN NaN NaN \n",
+ " OpenDOAR_repository_metadata.repository_status \\\n",
+ "0 fully_functional \n",
+ "1 fully_functional \n",
+ "2 fully_functional \n",
+ "3 fully_functional \n",
+ "4 NaN \n",
"\n",
- " roar_dir roar_datestamp roar_lastmod roar_status_changed roar_type \\\n",
- "0 NaN NaN NaN NaN NaN \n",
- "1 NaN NaN NaN NaN NaN \n",
- "2 NaN NaN NaN NaN NaN \n",
- "3 NaN NaN NaN NaN NaN \n",
- "4 NaN NaN NaN NaN NaN \n",
+ " OpenDOAR_repository_metadata.fulltext_record_count \\\n",
+ "0 20.0 \n",
+ "1 26851.0 \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
"\n",
- " roar_succeeds roar_commentary roar_metadata_visibility roar_latitude \\\n",
- "0 NaN NaN NaN NaN \n",
- "1 NaN NaN NaN NaN \n",
- "2 NaN NaN NaN NaN \n",
- "3 NaN NaN NaN NaN \n",
- "4 NaN NaN NaN NaN \n",
+ " OpenDOAR_repository_metadata.metadata_record_count OpenDOAR_unique_id \\\n",
+ "0 239671.0 OpenDOAR_1064 \n",
+ "1 78076.0 OpenDOAR_8648 \n",
+ "2 NaN OpenDOAR_9713 \n",
+ "3 20513.0 OpenDOAR_427 \n",
+ "4 NaN NaN \n",
"\n",
- " roar_longitude roar_relation_type roar_relation_uri roar_item_issues_id \\\n",
- "0 NaN NaN NaN NaN \n",
- "1 NaN NaN NaN NaN \n",
- "2 NaN NaN NaN NaN \n",
- "3 NaN NaN NaN NaN \n",
- "4 NaN NaN NaN NaN \n",
+ " roar_eprintid roar_rev_number roar_eprint_status roar_userid roar_importid \\\n",
+ "0 NaN NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN NaN \n",
+ "3 NaN NaN NaN NaN NaN \n",
+ "4 NaN NaN NaN NaN NaN \n",
"\n",
- " roar_item_issues_type roar_item_issues_description \\\n",
- "0 NaN NaN \n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
+ " roar_source roar_dir roar_datestamp roar_lastmod roar_status_changed \\\n",
+ "0 NaN NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN NaN \n",
+ "3 NaN NaN NaN NaN NaN \n",
+ "4 NaN NaN NaN NaN NaN \n",
+ "\n",
+ " roar_type roar_succeeds roar_commentary roar_metadata_visibility \\\n",
+ "0 NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN \n",
+ "3 NaN NaN NaN NaN \n",
+ "4 NaN NaN NaN NaN \n",
+ "\n",
+ " roar_latitude roar_longitude roar_relation_type roar_relation_uri \\\n",
+ "0 NaN NaN NaN NaN \n",
+ "1 NaN NaN NaN NaN \n",
+ "2 NaN NaN NaN NaN \n",
+ "3 NaN NaN NaN NaN \n",
+ "4 NaN NaN NaN NaN \n",
+ "\n",
+ " roar_item_issues_id roar_item_issues_type roar_item_issues_description \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
"\n",
" roar_item_issues_timestamp roar_item_issues_status \\\n",
"0 NaN NaN \n",
@@ -9322,7 +9478,7 @@
"4 NaN NaN "
]
},
- "execution_count": 26,
+ "execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
@@ -9337,18 +9493,18 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- ":1: PerformanceWarning:\n",
+ ":1: PerformanceWarning:\n",
"\n",
"DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
"\n",
- ":2: PerformanceWarning:\n",
+ ":2: PerformanceWarning:\n",
"\n",
"DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
"\n"
@@ -9470,13 +9626,16 @@
" OpenDOAR_repository_metadata.content_languages | \n",
" OpenDOAR_system_metadata.date_modified | \n",
" OpenDOAR_system_metadata.date_created | \n",
- " OpenDOAR_repository_metadata.content_subjects_phrases | \n",
+ " OpenDOAR_repository_metadata.content_subjects | \n",
" OpenDOAR_repository_metadata.content_types | \n",
" OpenDOAR_organization | \n",
" OpenDOAR_policy_urls | \n",
" OpenDOAR_repository_metadata.software | \n",
" OpenDOAR_repository_metadata.oai_url | \n",
" OpenDOAR_system_metadata.publicly_visible | \n",
+ " OpenDOAR_repository_metadata.repository_status | \n",
+ " OpenDOAR_repository_metadata.fulltext_record_count | \n",
+ " OpenDOAR_repository_metadata.metadata_record_count | \n",
" OpenDOAR_unique_id | \n",
" roar_eprintid | \n",
" roar_rev_number | \n",
@@ -9663,6 +9822,9 @@
" [nan, nan] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
+ " [nan, nan] | \n",
+ " [nan, nan] | \n",
+ " [nan, nan] | \n",
" [4612, 4649] | \n",
" [28, 8] | \n",
" [archive, archive] | \n",
@@ -9712,7 +9874,7 @@
" [dspace, dspace] | \n",
" [geoname_2_IN, geoname_2_IN] | \n",
" [other, other] | \n",
- " [[TP, TN, TJ, TH, TK, TD, TA], [TA, T1]] | \n",
+ " [[TD, TP, TH, TJ, TK, TN, TA], [T1, TA]] | \n",
" [2011-12-15 09:01:35, 2012-01-05 12:09:37] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
@@ -9846,6 +10008,9 @@
" [nan, nan] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
+ " [nan, nan] | \n",
+ " [nan, nan] | \n",
+ " [nan, nan] | \n",
" [7943, 8003] | \n",
" [16, 19] | \n",
" [archive, archive] | \n",
@@ -9895,7 +10060,7 @@
" [eprints, eprints] | \n",
" [geoname_2_IN, geoname_2_IN] | \n",
" [3.3.15 eps, 3.3.15 eps] | \n",
- " [[RB, RM], [R1, RZ]] | \n",
+ " [[RB, RM], [RZ, R1]] | \n",
" [2014-03-07 15:07:45, 2014-03-19 07:05:04] | \n",
" [The National Institute for Research in Tuberc... | \n",
" [nan, Please include \"Tuberculosis\" as a Speci... | \n",
@@ -9908,7 +10073,7 @@
" [nan, nan] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
- " [[opendoar, celestial], celestial] | \n",
+ " [[celestial, opendoar], celestial] | \n",
" [[5410, 2725], 5430] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
@@ -10029,6 +10194,9 @@
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
+ " [nan, nan, nan] | \n",
+ " [nan, nan, nan] | \n",
+ " [nan, nan, nan] | \n",
" [2670, 2698, 2741] | \n",
" [470, 317, 231] | \n",
" [archive, archive, archive] | \n",
@@ -10091,7 +10259,7 @@
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
- " [[opendoar, celestial], opendoar, opendoar] | \n",
+ " [[celestial, opendoar], opendoar, opendoar] | \n",
" [[2426, 1781], 1781, 1807] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
@@ -10212,6 +10380,9 @@
" [nan, nan] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
+ " [nan, nan] | \n",
+ " [nan, nan] | \n",
+ " [nan, nan] | \n",
" [4393, 4394] | \n",
" [14, 14] | \n",
" [archive, archive] | \n",
@@ -10395,6 +10566,9 @@
" [nan, nan] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
+ " [nan, nan] | \n",
+ " [nan, nan] | \n",
+ " [nan, nan] | \n",
" [1019, 5550] | \n",
" [526, 9] | \n",
" [archive, archive] | \n",
@@ -10457,8 +10631,8 @@
" [nan, nan] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
- " [[roarmap, opendoar, celestial], [opendoar, ce... | \n",
- " [[193, 1456, 1441], [1456, 1441]] | \n",
+ " [[roarmap, celestial, opendoar], [celestial, o... | \n",
+ " [[1441, 193, 1456], [1441, 1456]] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
@@ -10842,12 +11016,12 @@
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
- " OpenDOAR_repository_metadata.content_subjects_phrases \\\n",
- "0 [nan, nan] \n",
- "1 [nan, nan] \n",
- "2 [nan, nan, nan] \n",
- "3 [nan, nan] \n",
- "4 [nan, nan] \n",
+ " OpenDOAR_repository_metadata.content_subjects \\\n",
+ "0 [nan, nan] \n",
+ "1 [nan, nan] \n",
+ "2 [nan, nan, nan] \n",
+ "3 [nan, nan] \n",
+ "4 [nan, nan] \n",
"\n",
" OpenDOAR_repository_metadata.content_types OpenDOAR_organization \\\n",
"0 [nan, nan] [nan, nan] \n",
@@ -10870,12 +11044,33 @@
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
- " OpenDOAR_system_metadata.publicly_visible OpenDOAR_unique_id \\\n",
- "0 [nan, nan] [nan, nan] \n",
- "1 [nan, nan] [nan, nan] \n",
- "2 [nan, nan, nan] [nan, nan, nan] \n",
- "3 [nan, nan] [nan, nan] \n",
- "4 [nan, nan] [nan, nan] \n",
+ " OpenDOAR_system_metadata.publicly_visible \\\n",
+ "0 [nan, nan] \n",
+ "1 [nan, nan] \n",
+ "2 [nan, nan, nan] \n",
+ "3 [nan, nan] \n",
+ "4 [nan, nan] \n",
+ "\n",
+ " OpenDOAR_repository_metadata.repository_status \\\n",
+ "0 [nan, nan] \n",
+ "1 [nan, nan] \n",
+ "2 [nan, nan, nan] \n",
+ "3 [nan, nan] \n",
+ "4 [nan, nan] \n",
+ "\n",
+ " OpenDOAR_repository_metadata.fulltext_record_count \\\n",
+ "0 [nan, nan] \n",
+ "1 [nan, nan] \n",
+ "2 [nan, nan, nan] \n",
+ "3 [nan, nan] \n",
+ "4 [nan, nan] \n",
+ "\n",
+ " OpenDOAR_repository_metadata.metadata_record_count OpenDOAR_unique_id \\\n",
+ "0 [nan, nan] [nan, nan] \n",
+ "1 [nan, nan] [nan, nan] \n",
+ "2 [nan, nan, nan] [nan, nan, nan] \n",
+ "3 [nan, nan] [nan, nan] \n",
+ "4 [nan, nan] [nan, nan] \n",
"\n",
" roar_eprintid roar_rev_number roar_eprint_status \\\n",
"0 [4612, 4649] [28, 8] [archive, archive] \n",
@@ -11060,8 +11255,8 @@
"4 [geoname_2_HK, geoname_2_CN] [other, other] \n",
"\n",
" roar_subjects \\\n",
- "0 [[TP, TN, TJ, TH, TK, TD, TA], [TA, T1]] \n",
- "1 [[RB, RM], [R1, RZ]] \n",
+ "0 [[TD, TP, TH, TJ, TK, TN, TA], [T1, TA]] \n",
+ "1 [[RB, RM], [RZ, R1]] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
@@ -11110,17 +11305,17 @@
"\n",
" roar_registry_name \\\n",
"0 [celestial, celestial] \n",
- "1 [[opendoar, celestial], celestial] \n",
- "2 [[opendoar, celestial], opendoar, opendoar] \n",
+ "1 [[celestial, opendoar], celestial] \n",
+ "2 [[celestial, opendoar], opendoar, opendoar] \n",
"3 [celestial, celestial] \n",
- "4 [[roarmap, opendoar, celestial], [opendoar, ce... \n",
+ "4 [[roarmap, celestial, opendoar], [celestial, o... \n",
"\n",
" roar_registry_id roar_submit_to roar_submitted_to_name \\\n",
"0 [4790, 4789] [nan, nan] [nan, nan] \n",
"1 [[5410, 2725], 5430] [nan, nan] [nan, nan] \n",
"2 [[2426, 1781], 1781, 1807] [nan, nan, nan] [nan, nan, nan] \n",
"3 [4715, 4715] [nan, nan] [nan, nan] \n",
- "4 [[193, 1456, 1441], [1456, 1441]] [nan, nan] [nan, nan] \n",
+ "4 [[1441, 193, 1456], [1441, 1456]] [nan, nan] [nan, nan] \n",
"\n",
" roar_submitted_to_done roar_webometrics_rank roar_webometrics_size \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
@@ -11151,7 +11346,7 @@
"4 [nan, nan] [roar_1019, roar_5550] {roar} "
]
},
- "execution_count": 27,
+ "execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
@@ -11164,18 +11359,18 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- ":1: PerformanceWarning:\n",
+ ":1: PerformanceWarning:\n",
"\n",
"DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
"\n",
- ":2: PerformanceWarning:\n",
+ ":2: PerformanceWarning:\n",
"\n",
"DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
"\n"
@@ -11297,13 +11492,16 @@
" OpenDOAR_repository_metadata.content_languages | \n",
" OpenDOAR_system_metadata.date_modified | \n",
" OpenDOAR_system_metadata.date_created | \n",
- " OpenDOAR_repository_metadata.content_subjects_phrases | \n",
+ " OpenDOAR_repository_metadata.content_subjects | \n",
" OpenDOAR_repository_metadata.content_types | \n",
" OpenDOAR_organization | \n",
" OpenDOAR_policy_urls | \n",
" OpenDOAR_repository_metadata.software | \n",
" OpenDOAR_repository_metadata.oai_url | \n",
" OpenDOAR_system_metadata.publicly_visible | \n",
+ " OpenDOAR_repository_metadata.repository_status | \n",
+ " OpenDOAR_repository_metadata.fulltext_record_count | \n",
+ " OpenDOAR_repository_metadata.metadata_record_count | \n",
" OpenDOAR_unique_id | \n",
" roar_eprintid | \n",
" roar_rev_number | \n",
@@ -11482,13 +11680,16 @@
" [nan, [\"es\"], [\"es\"]] | \n",
" [nan, 2021-09-13 13:35:56, 2021-09-13 13:36:17] | \n",
" [nan, 2012-02-28 12:12:09, 2019-02-19 10:51:49] | \n",
- " [nan, [multidisciplinary], [business and econo... | \n",
+ " [nan, [\"multidisciplinary\"], [\"business and ec... | \n",
" [nan, [journal_articles, theses_and_dissertati... | \n",
" [nan, [{'name': 'universidad nacional autónoma... | \n",
" [nan, [{\"policy_url\": \"http://ru.iiec.unam.mx/... | \n",
" [nan, {\"name\": \"eprints\", \"version\": \"3.3.15\"}... | \n",
" [nan, http://ru.iiec.unam.mx/cgi/oai2, nan] | \n",
" [nan, yes, yes] | \n",
+ " [nan, fully_functional, fully_functional] | \n",
+ " [nan, nan, nan] | \n",
+ " [nan, 3066.0, nan] | \n",
" [nan, OpenDOAR_2429, OpenDOAR_4320] | \n",
" [4745, nan, nan] | \n",
" [31, nan, nan] | \n",
@@ -11539,7 +11740,7 @@
" [eprints, nan, nan] | \n",
" [geoname_2_MX, nan, nan] | \n",
" [3.3.15 eps, nan, nan] | \n",
- " [[GF, HJ, HT, HB, HM, HC, HX, HN, H1, G1, T1, ... | \n",
+ " [[HA, HG, GF, HB, HC, JA, HX, HF, T1, HJ, H1, ... | \n",
" [2012-02-03 05:18:16, nan, nan] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
@@ -11552,7 +11753,7 @@
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
- " [[opendoar, celestial], nan, nan] | \n",
+ " [[celestial, opendoar], nan, nan] | \n",
" [[2429, 4818], nan, nan] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
@@ -11665,13 +11866,16 @@
" [[\"en\"], [\"en\"], nan] | \n",
" [2021-09-13 13:36:06, 2021-02-18 18:01:12, nan] | \n",
" [2014-06-16 13:36:00, 2019-03-26 14:07:30, nan] | \n",
- " [[multidisciplinary], [multidisciplinary], nan] | \n",
+ " [[\"multidisciplinary\"], [\"multidisciplinary\"],... | \n",
" [[journal_articles], [journal_articles, biblio... | \n",
" [[{'name': 'landmark university', 'alternative... | \n",
" [[{\"policy_url\": \"http://eprints.lmu.edu.ng/po... | \n",
" [{\"name\": \"eprints\", \"version\": \"3.3.12\"}, {\"n... | \n",
" [http://eprints.lmu.edu.ng/cgi/oai2, nan, nan] | \n",
" [yes, yes, nan] | \n",
+ " [fully_functional, fully_functional, nan] | \n",
+ " [nan, nan, nan] | \n",
+ " [507.0, nan, nan] | \n",
" [OpenDOAR_3087, OpenDOAR_4500, nan] | \n",
" [nan, nan, 8504] | \n",
" [nan, nan, 12] | \n",
@@ -11735,8 +11939,8 @@
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
- " [nan, nan, [opendoar, celestial]] | \n",
- " [nan, nan, [5621, 3087]] | \n",
+ " [nan, nan, [celestial, opendoar]] | \n",
+ " [nan, nan, [3087, 5621]] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
@@ -11848,13 +12052,16 @@
" [[\"pl\"], nan, nan] | \n",
" [2019-10-17 14:34:36, nan, nan] | \n",
" [2011-10-11 13:13:58, nan, nan] | \n",
- " [[multidisciplinary], nan, nan] | \n",
+ " [[\"multidisciplinary\"], nan, nan] | \n",
" [[journal_articles], nan, nan] | \n",
" [[{'name': 'iława', 'alternativeName': '', 'co... | \n",
" [[], nan, nan] | \n",
" [{\"name\": \"dlibra\", \"version\": \"4\"}, nan, nan] | \n",
" [http://ibc.ilawa.pl/dlibra/oai-pmh-repository... | \n",
" [yes, nan, nan] | \n",
+ " [fully_functional, nan, nan] | \n",
+ " [0.0, nan, nan] | \n",
+ " [3397.0, nan, nan] | \n",
" [OpenDOAR_2318, nan, nan] | \n",
" [nan, 5503, 4271] | \n",
" [nan, 9, 11] | \n",
@@ -11918,8 +12125,8 @@
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
- " [nan, [opendoar, celestial], [opendoar, celest... | \n",
- " [nan, [2318, 4672], [2318, 4672]] | \n",
+ " [nan, [celestial, opendoar], [celestial, opend... | \n",
+ " [nan, [4672, 2318], [4672, 2318]] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
@@ -12031,13 +12238,16 @@
" [nan, nan, [\"pt\"]] | \n",
" [nan, nan, 2019-10-17 14:34:23] | \n",
" [nan, nan, 2009-05-01 10:10:47] | \n",
- " [nan, nan, [education]] | \n",
+ " [nan, nan, [\"education\"]] | \n",
" [nan, nan, [theses_and_dissertations, unpub_re... | \n",
" [nan, nan, [{'name': 'ação educativa', 'altern... | \n",
" [nan, nan, []] | \n",
" [nan, nan, {\"name\": \"dspace\", \"version\": \"\"}] | \n",
" [nan, nan, http://www.bdae.org.br/dspace-oai/r... | \n",
" [nan, nan, yes] | \n",
+ " [nan, nan, fully_functional] | \n",
+ " [nan, nan, 0.0] | \n",
+ " [nan, nan, 2157.0] | \n",
" [nan, nan, OpenDOAR_1509] | \n",
" [5711, 126, nan] | \n",
" [9, 503, nan] | \n",
@@ -12101,8 +12311,8 @@
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
- " [[opendoar, celestial], [opendoar, celestial],... | \n",
- " [[1430, 1509], [1430, 1509], nan] | \n",
+ " [[celestial, opendoar], [celestial, opendoar],... | \n",
+ " [[1509, 1430], [1509, 1430], nan] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
@@ -12214,13 +12424,16 @@
" [nan, nan, [\"zh\", \"en\"]] | \n",
" [nan, nan, 2019-10-17 14:34:36] | \n",
" [nan, nan, 2011-10-10 13:13:11] | \n",
- " [nan, nan, [technology general, mechanical eng... | \n",
+ " [nan, nan, [\"technology general\", \"mechanical ... | \n",
" [nan, nan, [journal_articles, bibliographic_re... | \n",
" [nan, nan, [{'name': 'chinese academy of scien... | \n",
" [nan, nan, []] | \n",
" [nan, nan, {\"name\": \"dspace\", \"version\": \"\"}] | \n",
" [nan, nan, http://ir.nimte.ac.cn/casirgrid-oai... | \n",
" [nan, nan, yes] | \n",
+ " [nan, nan, fully_functional] | \n",
+ " [nan, nan, nan] | \n",
+ " [nan, nan, 4443.0] | \n",
" [nan, nan, OpenDOAR_2306] | \n",
" [4379, 4266, nan] | \n",
" [15, 11, nan] | \n",
@@ -12284,7 +12497,7 @@
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
- " [celestial, [opendoar, celestial], nan] | \n",
+ " [celestial, [celestial, opendoar], nan] | \n",
" [4668, [4668, 2306], nan] | \n",
" [nan, nan, nan] | \n",
" [nan, nan, nan] | \n",
@@ -12683,12 +12896,12 @@
"3 [nan, nan, 2009-05-01 10:10:47] \n",
"4 [nan, nan, 2011-10-10 13:13:11] \n",
"\n",
- " OpenDOAR_repository_metadata.content_subjects_phrases \\\n",
- "0 [nan, [multidisciplinary], [business and econo... \n",
- "1 [[multidisciplinary], [multidisciplinary], nan] \n",
- "2 [[multidisciplinary], nan, nan] \n",
- "3 [nan, nan, [education]] \n",
- "4 [nan, nan, [technology general, mechanical eng... \n",
+ " OpenDOAR_repository_metadata.content_subjects \\\n",
+ "0 [nan, [\"multidisciplinary\"], [\"business and ec... \n",
+ "1 [[\"multidisciplinary\"], [\"multidisciplinary\"],... \n",
+ "2 [[\"multidisciplinary\"], nan, nan] \n",
+ "3 [nan, nan, [\"education\"]] \n",
+ "4 [nan, nan, [\"technology general\", \"mechanical ... \n",
"\n",
" OpenDOAR_repository_metadata.content_types \\\n",
"0 [nan, [journal_articles, theses_and_dissertati... \n",
@@ -12732,6 +12945,27 @@
"3 [nan, nan, yes] \n",
"4 [nan, nan, yes] \n",
"\n",
+ " OpenDOAR_repository_metadata.repository_status \\\n",
+ "0 [nan, fully_functional, fully_functional] \n",
+ "1 [fully_functional, fully_functional, nan] \n",
+ "2 [fully_functional, nan, nan] \n",
+ "3 [nan, nan, fully_functional] \n",
+ "4 [nan, nan, fully_functional] \n",
+ "\n",
+ " OpenDOAR_repository_metadata.fulltext_record_count \\\n",
+ "0 [nan, nan, nan] \n",
+ "1 [nan, nan, nan] \n",
+ "2 [0.0, nan, nan] \n",
+ "3 [nan, nan, 0.0] \n",
+ "4 [nan, nan, nan] \n",
+ "\n",
+ " OpenDOAR_repository_metadata.metadata_record_count \\\n",
+ "0 [nan, 3066.0, nan] \n",
+ "1 [507.0, nan, nan] \n",
+ "2 [3397.0, nan, nan] \n",
+ "3 [nan, nan, 2157.0] \n",
+ "4 [nan, nan, 4443.0] \n",
+ "\n",
" OpenDOAR_unique_id roar_eprintid roar_rev_number \\\n",
"0 [nan, OpenDOAR_2429, OpenDOAR_4320] [4745, nan, nan] [31, nan, nan] \n",
"1 [OpenDOAR_3087, OpenDOAR_4500, nan] [nan, nan, 8504] [nan, nan, 12] \n",
@@ -12908,7 +13142,7 @@
"4 [geoname_2_CN, geoname_2_CN, nan] [other, other, nan] \n",
"\n",
" roar_subjects \\\n",
- "0 [[GF, HJ, HT, HB, HM, HC, HX, HN, H1, G1, T1, ... \n",
+ "0 [[HA, HG, GF, HB, HC, JA, HX, HF, T1, HJ, H1, ... \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
@@ -12943,17 +13177,17 @@
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_fulltexts_rdocs roar_registry_name \\\n",
- "0 [nan, nan, nan] [[opendoar, celestial], nan, nan] \n",
- "1 [nan, nan, nan] [nan, nan, [opendoar, celestial]] \n",
- "2 [nan, nan, nan] [nan, [opendoar, celestial], [opendoar, celest... \n",
- "3 [nan, nan, nan] [[opendoar, celestial], [opendoar, celestial],... \n",
- "4 [nan, nan, nan] [celestial, [opendoar, celestial], nan] \n",
+ "0 [nan, nan, nan] [[celestial, opendoar], nan, nan] \n",
+ "1 [nan, nan, nan] [nan, nan, [celestial, opendoar]] \n",
+ "2 [nan, nan, nan] [nan, [celestial, opendoar], [celestial, opend... \n",
+ "3 [nan, nan, nan] [[celestial, opendoar], [celestial, opendoar],... \n",
+ "4 [nan, nan, nan] [celestial, [celestial, opendoar], nan] \n",
"\n",
" roar_registry_id roar_submit_to roar_submitted_to_name \\\n",
"0 [[2429, 4818], nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
- "1 [nan, nan, [5621, 3087]] [nan, nan, nan] [nan, nan, nan] \n",
- "2 [nan, [2318, 4672], [2318, 4672]] [nan, nan, nan] [nan, nan, nan] \n",
- "3 [[1430, 1509], [1430, 1509], nan] [nan, nan, nan] [nan, nan, nan] \n",
+ "1 [nan, nan, [3087, 5621]] [nan, nan, nan] [nan, nan, nan] \n",
+ "2 [nan, [4672, 2318], [4672, 2318]] [nan, nan, nan] [nan, nan, nan] \n",
+ "3 [[1509, 1430], [1509, 1430], nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [4668, [4668, 2306], nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_submitted_to_done roar_webometrics_rank roar_webometrics_size \\\n",
@@ -12985,7 +13219,7 @@
"4 [nan, nan, nan] [roar_4379, roar_4266, nan] {roar, OpenDOAR} "
]
},
- "execution_count": 28,
+ "execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
@@ -12998,18 +13232,18 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- ":1: PerformanceWarning:\n",
+ ":1: PerformanceWarning:\n",
"\n",
"DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
"\n",
- ":2: PerformanceWarning:\n",
+ ":2: PerformanceWarning:\n",
"\n",
"DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
"\n"
@@ -13131,13 +13365,16 @@
" OpenDOAR_repository_metadata.content_languages | \n",
" OpenDOAR_system_metadata.date_modified | \n",
" OpenDOAR_system_metadata.date_created | \n",
- " OpenDOAR_repository_metadata.content_subjects_phrases | \n",
+ " OpenDOAR_repository_metadata.content_subjects | \n",
" OpenDOAR_repository_metadata.content_types | \n",
" OpenDOAR_organization | \n",
" OpenDOAR_policy_urls | \n",
" OpenDOAR_repository_metadata.software | \n",
" OpenDOAR_repository_metadata.oai_url | \n",
" OpenDOAR_system_metadata.publicly_visible | \n",
+ " OpenDOAR_repository_metadata.repository_status | \n",
+ " OpenDOAR_repository_metadata.fulltext_record_count | \n",
+ " OpenDOAR_repository_metadata.metadata_record_count | \n",
" OpenDOAR_unique_id | \n",
" roar_eprintid | \n",
" roar_rev_number | \n",
@@ -13279,9 +13516,9 @@
" [2014, nan] | \n",
" [nan, nan] | \n",
" [[\"eng\"], nan] | \n",
- " [[2 Life Sciences, 201 Basic Biological and Me... | \n",
+ " [[{'name': '2 Life Sciences', 'scheme': 'DFG'}... | \n",
" [nan, nan] | \n",
- " [[Images, Structured text], nan] | \n",
+ " [[{'name': 'Images', 'scheme': 'parse'}, {'nam... | \n",
" [[dataProvider], nan] | \n",
" [[genomes, life sciences, proteins, proteomes,... | \n",
" [[{'institutionName': 'Georgetown University, ... | \n",
@@ -13294,7 +13531,7 @@
" [[], nan] | \n",
" [[\"unknown\"], nan] | \n",
" [yes, nan] | \n",
- " [[\"ftp://ftp.pir.georgetown.edu/databases/\", \"... | \n",
+ " [{\"api\": \"ftp://ftp.pir.georgetown.edu/databas... | \n",
" [[\"none\"], nan] | \n",
" [nan, nan] | \n",
" [[], nan] | \n",
@@ -13400,7 +13637,10 @@
" [nan, nan] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
- " {FAIRsharing, re3data} | \n",
+ " [nan, nan] | \n",
+ " [nan, nan] | \n",
+ " [nan, nan] | \n",
+ " {re3data, FAIRsharing} | \n",
" \n",
" \n",
" 1 | \n",
@@ -13499,13 +13739,16 @@
" [[\"uk\", \"en\"], nan] | \n",
" [2019-10-17 14:34:57, nan] | \n",
" [2015-07-08 12:43:38, nan] | \n",
- " [[multidisciplinary], nan] | \n",
+ " [[\"multidisciplinary\"], nan] | \n",
" [[journal_articles, conference_and_workshop_pa... | \n",
" [[{'name': 'ukrainian catholic university', 'a... | \n",
" [[], nan] | \n",
" [{\"name\": \"dspace\", \"version\": \"\"}, nan] | \n",
" [nan, nan] | \n",
" [yes, nan] | \n",
+ " [fully_functional, nan] | \n",
+ " [nan, nan] | \n",
+ " [840.0, nan] | \n",
" [OpenDOAR_3410, nan] | \n",
" [nan, 10013] | \n",
" [nan, 31] | \n",
@@ -13556,7 +13799,7 @@
" [nan, dspace] | \n",
" [nan, geoname_2_UA] | \n",
" [nan, other] | \n",
- " [nan, [H1, L1, AC, D204, B1, D1, DK, BF, BS, H... | \n",
+ " [nan, [D204, BS, BL, B1, D901, DK, H1, HM, L1,... | \n",
" [nan, 2015-07-07 12:38:37] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
@@ -13569,7 +13812,7 @@
" [nan, nan] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
- " [nan, [opendoar, celestial]] | \n",
+ " [nan, [celestial, opendoar]] | \n",
" [nan, [3410, 5883]] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
@@ -13581,7 +13824,7 @@
" [nan, nan] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
- " [nan, [russell_group, ivy_league]] | \n",
+ " [nan, [ivy_league, russell_group]] | \n",
" [nan, roar_10013] | \n",
" {roar, OpenDOAR} | \n",
"
\n",
@@ -13645,9 +13888,9 @@
" [nan, 2012-05-22] | \n",
" [nan, nan] | \n",
" [nan, [\"eng\"]] | \n",
- " [nan, [2 Life Sciences, 201 Basic Biological a... | \n",
+ " [nan, [{'name': '2 Life Sciences', 'scheme': '... | \n",
" [nan, https://sagebionetworks.org/tools_resour... | \n",
- " [nan, [Raw data, Scientific and statistical da... | \n",
+ " [nan, [{'name': 'Raw data', 'scheme': 'parse'}... | \n",
" [nan, [dataProvider, serviceProvider]] | \n",
" [nan, [AMP-AD Knowledge Portal, DREAM Challeng... | \n",
" [nan, [{'institutionName': 'Alfred P. Sloan Fo... | \n",
@@ -13660,7 +13903,7 @@
" [nan, []] | \n",
" [nan, [\"unknown\"]] | \n",
" [nan, yes] | \n",
- " [nan, [\"https://docs.synapse.org/rest/\", \"REST\"]] | \n",
+ " [nan, {\"api\": \"https://docs.synapse.org/rest/\"... | \n",
" [nan, [\"DOI\"]] | \n",
" [nan, nan] | \n",
" [nan, []] | \n",
@@ -13766,7 +14009,10 @@
" [nan, nan] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
- " {FAIRsharing, re3data} | \n",
+ " [nan, nan] | \n",
+ " [nan, nan] | \n",
+ " [nan, nan] | \n",
+ " {re3data, FAIRsharing} | \n",
" \n",
" \n",
" 3 | \n",
@@ -13865,13 +14111,16 @@
" [nan, [\"tr\"]] | \n",
" [nan, 2021-05-21 18:05:06] | \n",
" [nan, 2020-06-02 09:14:18] | \n",
- " [nan, [multidisciplinary]] | \n",
+ " [nan, [\"multidisciplinary\"]] | \n",
" [nan, [journal_articles]] | \n",
" [nan, [{'name': 'giresun university', 'alterna... | \n",
" [nan, []] | \n",
" [nan, {\"name\": \"dspace\", \"version\": \"6.2\"}] | \n",
" [nan, http://acikerisim.giresun.edu.tr/oai/req... | \n",
" [nan, yes] | \n",
+ " [nan, fully_functional] | \n",
+ " [nan, nan] | \n",
+ " [nan, nan] | \n",
" [nan, OpenDOAR_9647] | \n",
" [16034, nan] | \n",
" [7, nan] | \n",
@@ -14048,13 +14297,16 @@
" [nan, [\"eu\", \"fr\", \"es\", \"en\"]] | \n",
" [nan, 2019-10-17 14:34:21] | \n",
" [nan, 2009-02-02 13:13:26] | \n",
- " [nan, [multidisciplinary]] | \n",
+ " [nan, [\"multidisciplinary\"]] | \n",
" [nan, [journal_articles, books_chapters_and_se... | \n",
" [nan, [{'name': 'euskomedia', 'alternativeName... | \n",
" [nan, []] | \n",
" [nan, {\"name\": \"eprints\", \"version\": \"3.0.5\"}] | \n",
" [nan, http://hedatuz.euskomedia.org/cgi/oai2] | \n",
" [nan, yes] | \n",
+ " [nan, technically_malfunctioning] | \n",
+ " [nan, nan] | \n",
+ " [nan, 10570.0] | \n",
" [nan, OpenDOAR_1426] | \n",
" [610, nan] | \n",
" [514, nan] | \n",
@@ -14118,8 +14370,8 @@
" [nan, nan] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
- " [[opendoar, celestial], nan] | \n",
- " [[1294, 1426], nan] | \n",
+ " [[celestial, opendoar], nan] | \n",
+ " [[1426, 1294], nan] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
" [nan, nan] | \n",
@@ -14462,9 +14714,9 @@
"4 [nan, nan] \n",
"\n",
" re3data_subject \\\n",
- "0 [[2 Life Sciences, 201 Basic Biological and Me... \n",
+ "0 [[{'name': '2 Life Sciences', 'scheme': 'DFG'}... \n",
"1 [nan, nan] \n",
- "2 [nan, [2 Life Sciences, 201 Basic Biological a... \n",
+ "2 [nan, [{'name': '2 Life Sciences', 'scheme': '... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
@@ -14476,9 +14728,9 @@
"4 [nan, nan] \n",
"\n",
" re3data_contentType \\\n",
- "0 [[Images, Structured text], nan] \n",
+ "0 [[{'name': 'Images', 'scheme': 'parse'}, {'nam... \n",
"1 [nan, nan] \n",
- "2 [nan, [Raw data, Scientific and statistical da... \n",
+ "2 [nan, [{'name': 'Raw data', 'scheme': 'parse'}... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
@@ -14539,9 +14791,9 @@
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_api re3data_pidSystem \\\n",
- "0 [[\"ftp://ftp.pir.georgetown.edu/databases/\", \"... [[\"none\"], nan] \n",
+ "0 [{\"api\": \"ftp://ftp.pir.georgetown.edu/databas... [[\"none\"], nan] \n",
"1 [nan, nan] [nan, nan] \n",
- "2 [nan, [\"https://docs.synapse.org/rest/\", \"REST\"]] [nan, [\"DOI\"]] \n",
+ "2 [nan, {\"api\": \"https://docs.synapse.org/rest/\"... [nan, [\"DOI\"]] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
@@ -14636,12 +14888,12 @@
"3 [nan, 2020-06-02 09:14:18] \n",
"4 [nan, 2009-02-02 13:13:26] \n",
"\n",
- " OpenDOAR_repository_metadata.content_subjects_phrases \\\n",
- "0 [nan, nan] \n",
- "1 [[multidisciplinary], nan] \n",
- "2 [nan, nan] \n",
- "3 [nan, [multidisciplinary]] \n",
- "4 [nan, [multidisciplinary]] \n",
+ " OpenDOAR_repository_metadata.content_subjects \\\n",
+ "0 [nan, nan] \n",
+ "1 [[\"multidisciplinary\"], nan] \n",
+ "2 [nan, nan] \n",
+ "3 [nan, [\"multidisciplinary\"]] \n",
+ "4 [nan, [\"multidisciplinary\"]] \n",
"\n",
" OpenDOAR_repository_metadata.content_types \\\n",
"0 [nan, nan] \n",
@@ -14671,12 +14923,33 @@
"3 [nan, http://acikerisim.giresun.edu.tr/oai/req... \n",
"4 [nan, http://hedatuz.euskomedia.org/cgi/oai2] \n",
"\n",
- " OpenDOAR_system_metadata.publicly_visible OpenDOAR_unique_id \\\n",
- "0 [nan, nan] [nan, nan] \n",
- "1 [yes, nan] [OpenDOAR_3410, nan] \n",
- "2 [nan, nan] [nan, nan] \n",
- "3 [nan, yes] [nan, OpenDOAR_9647] \n",
- "4 [nan, yes] [nan, OpenDOAR_1426] \n",
+ " OpenDOAR_system_metadata.publicly_visible \\\n",
+ "0 [nan, nan] \n",
+ "1 [yes, nan] \n",
+ "2 [nan, nan] \n",
+ "3 [nan, yes] \n",
+ "4 [nan, yes] \n",
+ "\n",
+ " OpenDOAR_repository_metadata.repository_status \\\n",
+ "0 [nan, nan] \n",
+ "1 [fully_functional, nan] \n",
+ "2 [nan, nan] \n",
+ "3 [nan, fully_functional] \n",
+ "4 [nan, technically_malfunctioning] \n",
+ "\n",
+ " OpenDOAR_repository_metadata.fulltext_record_count \\\n",
+ "0 [nan, nan] \n",
+ "1 [nan, nan] \n",
+ "2 [nan, nan] \n",
+ "3 [nan, nan] \n",
+ "4 [nan, nan] \n",
+ "\n",
+ " OpenDOAR_repository_metadata.metadata_record_count OpenDOAR_unique_id \\\n",
+ "0 [nan, nan] [nan, nan] \n",
+ "1 [840.0, nan] [OpenDOAR_3410, nan] \n",
+ "2 [nan, nan] [nan, nan] \n",
+ "3 [nan, nan] [nan, OpenDOAR_9647] \n",
+ "4 [nan, 10570.0] [nan, OpenDOAR_1426] \n",
"\n",
" roar_eprintid roar_rev_number roar_eprint_status roar_userid \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
@@ -14820,7 +15093,7 @@
"\n",
" roar_subjects \\\n",
"0 [nan, nan] \n",
- "1 [nan, [H1, L1, AC, D204, B1, D1, DK, BF, BS, H... \n",
+ "1 [nan, [D204, BS, BL, B1, D901, DK, H1, HM, L1,... \n",
"2 [nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
@@ -14862,10 +15135,10 @@
"\n",
" roar_registry_name roar_registry_id \\\n",
"0 [nan, nan] [nan, nan] \n",
- "1 [nan, [opendoar, celestial]] [nan, [3410, 5883]] \n",
+ "1 [nan, [celestial, opendoar]] [nan, [3410, 5883]] \n",
"2 [nan, nan] [nan, nan] \n",
"3 [roarmap, nan] [http://roarmap.eprints.org/1046/, nan] \n",
- "4 [[opendoar, celestial], nan] [[1294, 1426], nan] \n",
+ "4 [[celestial, opendoar], nan] [[1426, 1294], nan] \n",
"\n",
" roar_submit_to roar_submitted_to_name roar_submitted_to_done \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
@@ -14890,20 +15163,20 @@
"\n",
" roar_total_deposits roar_association roar_unique_id \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
- "1 [nan, nan] [nan, [russell_group, ivy_league]] [nan, roar_10013] \n",
+ "1 [nan, nan] [nan, [ivy_league, russell_group]] [nan, roar_10013] \n",
"2 [nan, nan] [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] [roar_16034, nan] \n",
"4 [nan, nan] [nan, nan] [roar_610, nan] \n",
"\n",
" source_set \n",
- "0 {FAIRsharing, re3data} \n",
+ "0 {re3data, FAIRsharing} \n",
"1 {roar, OpenDOAR} \n",
- "2 {FAIRsharing, re3data} \n",
+ "2 {re3data, FAIRsharing} \n",
"3 {roar, OpenDOAR} \n",
"4 {roar, OpenDOAR} "
]
},
- "execution_count": 29,
+ "execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
@@ -14916,7 +15189,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 34,
"metadata": {},
"outputs": [],
"source": [