From cc2c004b9ea02c8068ca7849cb36dcf47786dcde Mon Sep 17 00:00:00 2001 From: Andrea Mannocci Date: Fri, 8 Oct 2021 10:51:04 +0200 Subject: [PATCH] counting duplicates within --- notebooks/03-overlap.ipynb | 939 ++++++++++++++++++++++++------------- 1 file changed, 606 insertions(+), 333 deletions(-) diff --git a/notebooks/03-overlap.ipynb b/notebooks/03-overlap.ipynb index 9a81185..1fa3496 100644 --- a/notebooks/03-overlap.ipynb +++ b/notebooks/03-overlap.ipynb @@ -654,9 +654,9 @@ " NaN\n", " NaN\n", " [\"eng\"]\n", - " [1 Humanities and Social Sciences, 111 Social ...\n", + " [{'name': '1 Humanities and Social Sciences', ...\n", " NaN\n", - " [Databases, Plain text, Scientific and statist...\n", + " [{'name': 'Databases', 'scheme': 'parse'}, {'n...\n", " [dataProvider]\n", " [FAIR, Middle East, crime, demography, economy...\n", " [{'institutionName': 'Odum Institute for Resea...\n", @@ -669,7 +669,7 @@ " []\n", " [\"DataVerse\"]\n", " NaN\n", - " []\n", + " {}\n", " [\"DOI\"]\n", " NaN\n", " []\n", @@ -699,9 +699,9 @@ " 1985\n", " NaN\n", " [\"eng\", \"spa\"]\n", - " [1 Humanities and Social Sciences, 102 History...\n", + " [{'name': '1 Humanities and Social Sciences', ...\n", " https://www.archives.gov/publications/general-...\n", - " [Images, Standard office documents, Structured...\n", + " [{'name': 'Images', 'scheme': 'parse'}, {'name...\n", " [dataProvider]\n", " [US History]\n", " [{'institutionName': 'The U.S. National Archiv...\n", @@ -714,7 +714,7 @@ " []\n", " [\"unknown\"]\n", " no\n", - " [\"https://www.archives.gov/developer#toc-appli...\n", + " {\"api\": \"https://www.archives.gov/developer#to...\n", " [\"none\"]\n", " https://aad.archives.gov/aad/help/getting-star...\n", " []\n", @@ -744,9 +744,9 @@ " 2012\n", " NaN\n", " [\"deu\"]\n", - " [1 Humanities and Social Sciences, 104 Linguis...\n", + " [{'name': '1 Humanities and Social Sciences', ...\n", " https://dgd.ids-mannheim.de/dgd/pragdb.dgd_ext...\n", - " [Audiovisual data, Standard office documents, ...\n", + " [{'name': 'Audiovisual data', 'scheme': 'parse...\n", " [dataProvider, serviceProvider]\n", " [Australian German, FOLK, German dialects, Pfe...\n", " [{'institutionName': 'Institut für Deutsche Sp...\n", @@ -759,7 +759,7 @@ " []\n", " [\"other\"]\n", " yes\n", - " []\n", + " {}\n", " [\"none\"]\n", " http://agd.ids-mannheim.de/konditionen.shtml\n", " []\n", @@ -789,9 +789,9 @@ " 2011\n", " NaN\n", " [\"eng\"]\n", - " [1 Humanities and Social Sciences, 111 Social ...\n", + " [{'name': '1 Humanities and Social Sciences', ...\n", " https://odum.unc.edu/about/mission-vision/\n", - " [Archived data, Plain text, Raw data, Scientif...\n", + " [{'name': 'Archived data', 'scheme': 'parse'},...\n", " [dataProvider, serviceProvider]\n", " [FAIR, census, demographic survey, demography,...\n", " [{'institutionName': 'Odum Institute for Resea...\n", @@ -804,7 +804,7 @@ " [{\"dataUploadLicenseName\": \"Data Deposit Form\"...\n", " [\"DataVerse\"]\n", " yes\n", - " [\"https://guides.dataverse.org/en/latest/api/n...\n", + " {\"api\": \"https://guides.dataverse.org/en/lates...\n", " [\"ARK\", \"DOI\", \"PURL\", \"URN\", \"hdl\"]\n", " https://dataverse.org/best-practices/data-cita...\n", " []\n", @@ -834,9 +834,9 @@ " 1996-10-01\n", " NaN\n", " [\"eng\"]\n", - " [1 Humanities and Social Sciences, 101 Ancient...\n", + " [{'name': '1 Humanities and Social Sciences', ...\n", " https://archaeologydataservice.ac.uk/about/our...\n", - " [Archived data, Audiovisual data, Databases, I...\n", + " [{'name': 'Archived data', 'scheme': 'parse'},...\n", " [dataProvider, serviceProvider]\n", " [FAIR, archaeology, cultural heritage, prehist...\n", " [{'institutionName': 'Arts and Humanities Rese...\n", @@ -849,7 +849,7 @@ " [{\"dataUploadLicenseName\": \"Guidelines for Dep...\n", " [\"other\"]\n", " yes\n", - " [\"https://archaeologydataservice.ac.uk/about/e...\n", + " {\"api\": \"https://archaeologydataservice.ac.uk/...\n", " [\"DOI\"]\n", " https://archaeologydataservice.ac.uk/advice/te...\n", " []\n", @@ -939,11 +939,11 @@ "4 NaN [\"eng\"] \n", "\n", " re3data_subject \\\n", - "0 [1 Humanities and Social Sciences, 111 Social ... \n", - "1 [1 Humanities and Social Sciences, 102 History... \n", - "2 [1 Humanities and Social Sciences, 104 Linguis... \n", - "3 [1 Humanities and Social Sciences, 111 Social ... \n", - "4 [1 Humanities and Social Sciences, 101 Ancient... \n", + "0 [{'name': '1 Humanities and Social Sciences', ... \n", + "1 [{'name': '1 Humanities and Social Sciences', ... \n", + "2 [{'name': '1 Humanities and Social Sciences', ... \n", + "3 [{'name': '1 Humanities and Social Sciences', ... \n", + "4 [{'name': '1 Humanities and Social Sciences', ... \n", "\n", " re3data_missionStatementURL \\\n", "0 NaN \n", @@ -953,11 +953,11 @@ "4 https://archaeologydataservice.ac.uk/about/our... \n", "\n", " re3data_contentType \\\n", - "0 [Databases, Plain text, Scientific and statist... \n", - "1 [Images, Standard office documents, Structured... \n", - "2 [Audiovisual data, Standard office documents, ... \n", - "3 [Archived data, Plain text, Raw data, Scientif... \n", - "4 [Archived data, Audiovisual data, Databases, I... \n", + "0 [{'name': 'Databases', 'scheme': 'parse'}, {'n... \n", + "1 [{'name': 'Images', 'scheme': 'parse'}, {'name... \n", + "2 [{'name': 'Audiovisual data', 'scheme': 'parse... \n", + "3 [{'name': 'Archived data', 'scheme': 'parse'},... \n", + "4 [{'name': 'Archived data', 'scheme': 'parse'},... \n", "\n", " re3data_providerType \\\n", "0 [dataProvider] \n", @@ -1023,11 +1023,11 @@ "4 [{\"dataUploadLicenseName\": \"Guidelines for Dep... [\"other\"] \n", "\n", " re3data_versioning re3data_api \\\n", - "0 NaN [] \n", - "1 no [\"https://www.archives.gov/developer#toc-appli... \n", - "2 yes [] \n", - "3 yes [\"https://guides.dataverse.org/en/latest/api/n... \n", - "4 yes [\"https://archaeologydataservice.ac.uk/about/e... \n", + "0 NaN {} \n", + "1 no {\"api\": \"https://www.archives.gov/developer#to... \n", + "2 yes {} \n", + "3 yes {\"api\": \"https://guides.dataverse.org/en/lates... \n", + "4 yes {\"api\": \"https://archaeologydataservice.ac.uk/... \n", "\n", " re3data_pidSystem \\\n", "0 [\"DOI\"] \n", @@ -1136,13 +1136,16 @@ " OpenDOAR_repository_metadata.content_languages\n", " OpenDOAR_system_metadata.date_modified\n", " OpenDOAR_system_metadata.date_created\n", - " OpenDOAR_repository_metadata.content_subjects_phrases\n", + " OpenDOAR_repository_metadata.content_subjects\n", " OpenDOAR_repository_metadata.content_types\n", " OpenDOAR_organization\n", " OpenDOAR_policy_urls\n", " OpenDOAR_repository_metadata.software\n", " OpenDOAR_repository_metadata.oai_url\n", " OpenDOAR_system_metadata.publicly_visible\n", + " OpenDOAR_repository_metadata.repository_status\n", + " OpenDOAR_repository_metadata.fulltext_record_count\n", + " OpenDOAR_repository_metadata.metadata_record_count\n", " OpenDOAR_unique_id\n", " \n", " \n", @@ -1158,13 +1161,16 @@ " [\"zh\", \"en\"]\n", " 2021-03-25 10:16:18\n", " 2005-12-21 12:44:08\n", - " [multidisciplinary]\n", + " [\"multidisciplinary\"]\n", " [bibliographic_references, theses_and_disserta...\n", " [{'name': 'university of hong kong', 'alternat...\n", " []\n", " {\"name\": \"dspace\", \"version\": \"cris-5.3.1-snap...\n", " NaN\n", " yes\n", + " fully_functional\n", + " NaN\n", + " 11850.0\n", " OpenDOAR_175\n", " \n", " \n", @@ -1178,13 +1184,16 @@ " [\"cs\", \"en\", \"hu\", \"ru\"]\n", " 2021-03-25 09:48:31\n", " 2006-01-04 14:59:30\n", - " [multidisciplinary]\n", + " [\"multidisciplinary\"]\n", " [unpub_reports_and_working_papers]\n", " [{'name': 'central european university', 'alte...\n", " []\n", " {\"name\": \"eprints\", \"version\": \"2.2.1\"}\n", " http://rss.archives.ceu.hu/perl/oai2\n", " yes\n", + " fully_functional\n", + " NaN\n", + " 164.0\n", " OpenDOAR_64\n", " \n", " \n", @@ -1198,13 +1207,16 @@ " [\"nl\", \"en\", \"fr\", \"de\", \"it\"]\n", " 2021-09-13 13:35:36\n", " 2006-01-04 12:07:07\n", - " [history and archaeology, multidisciplinary, s...\n", + " [\"history and archaeology\", \"multidisciplinary...\n", " [journal_articles, theses_and_dissertations, u...\n", " [{'name': 'european university institute', 'al...\n", " [{\"policy_url\": \"https://www.eui.eu/research/e...\n", " {\"name\": \"dspace\", \"version\": \"5.2\"}\n", " http://cadmus.eui.eu/oai/request\n", " yes\n", + " fully_functional\n", + " 3867.0\n", + " 24869.0\n", " OpenDOAR_151\n", " \n", " \n", @@ -1218,13 +1230,16 @@ " [\"nl\", \"en\", \"fr\", \"de\"]\n", " 2021-04-16 15:23:52\n", " 2006-01-24 15:46:44\n", - " [multidisciplinary]\n", + " [\"multidisciplinary\"]\n", " [journal_articles, conference_and_workshop_pap...\n", " [{'name': 'uhasselt', 'alternativeName': 'hass...\n", " []\n", " {\"name\": \"dspace\", \"version\": \"1.7.2\"}\n", " http://doclib.uhasselt.be/dspace-oai/request\n", " yes\n", + " fully_functional\n", + " 0.0\n", + " 27376.0\n", " OpenDOAR_105\n", " \n", " \n", @@ -1238,13 +1253,16 @@ " [\"nl\", \"en\"]\n", " 2021-04-16 15:22:03\n", " 2006-01-13 12:55:13\n", - " [multidisciplinary]\n", + " [\"multidisciplinary\"]\n", " [journal_articles, conference_and_workshop_pap...\n", " [{'name': 'university of utrecht', 'alternativ...\n", " []\n", " {\"name\": \"dspace\", \"version\": \"\"}\n", " https://dspace.library.uu.nl/oai/request\n", " yes\n", + " fully_functional\n", + " 1686.0\n", + " 185637.0\n", " OpenDOAR_101\n", " \n", " \n", @@ -1315,12 +1333,12 @@ "3 2006-01-24 15:46:44 \n", "4 2006-01-13 12:55:13 \n", "\n", - " OpenDOAR_repository_metadata.content_subjects_phrases \\\n", - "0 [multidisciplinary] \n", - "1 [multidisciplinary] \n", - "2 [history and archaeology, multidisciplinary, s... \n", - "3 [multidisciplinary] \n", - "4 [multidisciplinary] \n", + " OpenDOAR_repository_metadata.content_subjects \\\n", + "0 [\"multidisciplinary\"] \n", + "1 [\"multidisciplinary\"] \n", + "2 [\"history and archaeology\", \"multidisciplinary... \n", + "3 [\"multidisciplinary\"] \n", + "4 [\"multidisciplinary\"] \n", "\n", " OpenDOAR_repository_metadata.content_types \\\n", "0 [bibliographic_references, theses_and_disserta... \n", @@ -1357,12 +1375,33 @@ "3 http://doclib.uhasselt.be/dspace-oai/request \n", "4 https://dspace.library.uu.nl/oai/request \n", "\n", - " OpenDOAR_system_metadata.publicly_visible OpenDOAR_unique_id \n", - "0 yes OpenDOAR_175 \n", - "1 yes OpenDOAR_64 \n", - "2 yes OpenDOAR_151 \n", - "3 yes OpenDOAR_105 \n", - "4 yes OpenDOAR_101 " + " OpenDOAR_system_metadata.publicly_visible \\\n", + "0 yes \n", + "1 yes \n", + "2 yes \n", + "3 yes \n", + "4 yes \n", + "\n", + " OpenDOAR_repository_metadata.repository_status \\\n", + "0 fully_functional \n", + "1 fully_functional \n", + "2 fully_functional \n", + "3 fully_functional \n", + "4 fully_functional \n", + "\n", + " OpenDOAR_repository_metadata.fulltext_record_count \\\n", + "0 NaN \n", + "1 NaN \n", + "2 3867.0 \n", + "3 0.0 \n", + "4 1686.0 \n", + "\n", + " OpenDOAR_repository_metadata.metadata_record_count OpenDOAR_unique_id \n", + "0 11850.0 OpenDOAR_175 \n", + "1 164.0 OpenDOAR_64 \n", + "2 24869.0 OpenDOAR_151 \n", + "3 27376.0 OpenDOAR_105 \n", + "4 185637.0 OpenDOAR_101 " ] }, "execution_count": 4, @@ -1553,8 +1592,8 @@ " NaN\n", " NaN\n", " NaN\n", - " [opendoar, celestial]\n", - " [669, 58]\n", + " [celestial, opendoar]\n", + " [58, 669]\n", " NaN\n", " NaN\n", " NaN\n", @@ -1632,7 +1671,7 @@ " NaN\n", " NaN\n", " NaN\n", - " [opendoar, celestial]\n", + " [celestial, opendoar]\n", " [526, 258]\n", " NaN\n", " NaN\n", @@ -1768,8 +1807,8 @@ " TRUE\n", " TRUE\n", " TRUE\n", - " [Climate Service Center 2.0, Helmholtz-Zentrum...\n", - " [http://www.hzg.de/, http://www.klimzug.de/de/...\n", + " [Helmholtz-Zentrum Geesthacht, Climate Service...\n", + " [http://www.hzg.de/, http://www.climateservice...\n", " de\n", " Hamburg\n", " 53.5511\n", @@ -1777,7 +1816,7 @@ " opus\n", " geoname_2_DE\n", " other\n", - " [GE, GF, G1, S1, HD]\n", + " [GE, S1, G1, GF, HD]\n", " 2015-07-02 08:08:31\n", " NaN\n", " NaN\n", @@ -1790,7 +1829,7 @@ " NaN\n", " NaN\n", " NaN\n", - " [opendoar, celestial]\n", + " [celestial, opendoar]\n", " [3408, 5881]\n", " NaN\n", " NaN\n", @@ -1998,14 +2037,14 @@ "0 NaN \n", "1 NaN \n", "2 NaN \n", - "3 [Climate Service Center 2.0, Helmholtz-Zentrum... \n", + "3 [Helmholtz-Zentrum Geesthacht, Climate Service... \n", "4 Skidmore College \n", "\n", " roar_organisation_home_page roar_location_country \\\n", "0 NaN fr \n", "1 NaN se \n", "2 NaN pt \n", - "3 [http://www.hzg.de/, http://www.klimzug.de/de/... de \n", + "3 [http://www.hzg.de/, http://www.climateservice... de \n", "4 http://www.skidmore.edu/ us \n", "\n", " roar_location_city roar_location_latitude roar_location_longitude \\\n", @@ -2019,7 +2058,7 @@ "0 hal geoname_2_FR other NaN \n", "1 diva geoname_2_SE other NaN \n", "2 dspace geoname_2_PT other NaN \n", - "3 opus geoname_2_DE other [GE, GF, G1, S1, HD] \n", + "3 opus geoname_2_DE other [GE, S1, G1, GF, HD] \n", "4 bepress geoname_2_US other NaN \n", "\n", " roar_date roar_note roar_suggestions roar_activity_low \\\n", @@ -2051,10 +2090,10 @@ "4 NaN NaN NaN \n", "\n", " roar_registry_name roar_registry_id roar_submit_to \\\n", - "0 [opendoar, celestial] [669, 58] NaN \n", - "1 [opendoar, celestial] [526, 258] NaN \n", + "0 [celestial, opendoar] [58, 669] NaN \n", + "1 [celestial, opendoar] [526, 258] NaN \n", "2 NaN NaN NaN \n", - "3 [opendoar, celestial] [3408, 5881] NaN \n", + "3 [celestial, opendoar] [3408, 5881] NaN \n", "4 celestial 5882 NaN \n", "\n", " roar_submitted_to_name roar_submitted_to_done roar_webometrics_rank \\\n", @@ -2268,7 +2307,7 @@ " dspace\n", " geoname_2_UA\n", " other\n", - " [H1, L1, AC, D204, B1, D1, DK, BF, BS, HM, BL,...\n", + " [D204, BS, BL, B1, D901, DK, H1, HM, L1, BR, A...\n", " 2015-07-07 12:38:37\n", " NaN\n", " NaN\n", @@ -2281,7 +2320,7 @@ " NaN\n", " NaN\n", " NaN\n", - " [opendoar, celestial]\n", + " [celestial, opendoar]\n", " [3410, 5883]\n", " NaN\n", " NaN\n", @@ -2293,7 +2332,7 @@ " NaN\n", " NaN\n", " NaN\n", - " [russell_group, ivy_league]\n", + " [ivy_league, russell_group]\n", " roar_10013\n", " \n", " \n", @@ -2350,7 +2389,7 @@ "7 NaN NaN dspace geoname_2_UA \n", "\n", " roar_version roar_subjects \\\n", - "7 other [H1, L1, AC, D204, B1, D1, DK, BF, BS, HM, BL,... \n", + "7 other [D204, BS, BL, B1, D901, DK, H1, HM, L1, BR, A... \n", "\n", " roar_date roar_note roar_suggestions roar_activity_low \\\n", "7 2015-07-07 12:38:37 NaN NaN NaN \n", @@ -2362,7 +2401,7 @@ "7 NaN NaN NaN \n", "\n", " roar_fulltexts_rdocs roar_registry_name roar_registry_id roar_submit_to \\\n", - "7 NaN [opendoar, celestial] [3410, 5883] NaN \n", + "7 NaN [celestial, opendoar] [3410, 5883] NaN \n", "\n", " roar_submitted_to_name roar_submitted_to_done roar_webometrics_rank \\\n", "7 NaN NaN NaN \n", @@ -2374,7 +2413,7 @@ "7 NaN NaN NaN \n", "\n", " roar_total_deposits roar_association roar_unique_id \n", - "7 NaN [russell_group, ivy_league] roar_10013 " + "7 NaN [ivy_league, russell_group] roar_10013 " ] }, "execution_count": 6, @@ -2752,7 +2791,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -2810,7 +2849,7 @@ "sum 58 58 58 58 58" ] }, - "execution_count": 65, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -2822,7 +2861,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -2880,7 +2919,7 @@ "sum 6 6 6 6 6" ] }, - "execution_count": 64, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -2892,7 +2931,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -2950,7 +2989,7 @@ "sum 518 518 518 518 518" ] }, - "execution_count": 63, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -2962,7 +3001,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -2976,7 +3015,7 @@ "dtype: int64" ] }, - "execution_count": 53, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -2995,7 +3034,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -3136,7 +3175,7 @@ "dedup::03e0704b5690a2dee1861dc3ad3316c9 {roar} " ] }, - "execution_count": 14, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -3150,7 +3189,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -3291,7 +3330,7 @@ "dedup::03e0704b5690a2dee1861dc3ad3316c9 roar " ] }, - "execution_count": 15, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -3303,7 +3342,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -3379,7 +3418,7 @@ "roar 121 121 121 121 121" ] }, - "execution_count": 16, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -3390,7 +3429,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -3557,7 +3596,7 @@ "[287 rows x 6 columns]" ] }, - "execution_count": 17, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -3576,7 +3615,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -3769,7 +3808,7 @@ "[440 rows x 6 columns]" ] }, - "execution_count": 18, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -3793,7 +3832,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -3986,7 +4025,7 @@ "[3890 rows x 6 columns]" ] }, - "execution_count": 19, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -4007,7 +4046,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -4022,7 +4061,7 @@ "dtype: int64" ] }, - "execution_count": 20, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -4033,7 +4072,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -4048,7 +4087,7 @@ "dtype: int64" ] }, - "execution_count": 21, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -4059,7 +4098,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -4068,7 +4107,7 @@ "2191" ] }, - "execution_count": 22, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -4079,7 +4118,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -4088,7 +4127,7 @@ "2191" ] }, - "execution_count": 23, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -4106,7 +4145,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -4225,13 +4264,16 @@ " OpenDOAR_repository_metadata.content_languages\n", " OpenDOAR_system_metadata.date_modified\n", " OpenDOAR_system_metadata.date_created\n", - " OpenDOAR_repository_metadata.content_subjects_phrases\n", + " OpenDOAR_repository_metadata.content_subjects\n", " OpenDOAR_repository_metadata.content_types\n", " OpenDOAR_organization\n", " OpenDOAR_policy_urls\n", " OpenDOAR_repository_metadata.software\n", " OpenDOAR_repository_metadata.oai_url\n", " OpenDOAR_system_metadata.publicly_visible\n", + " OpenDOAR_repository_metadata.repository_status\n", + " OpenDOAR_repository_metadata.fulltext_record_count\n", + " OpenDOAR_repository_metadata.metadata_record_count\n", " OpenDOAR_unique_id\n", " roar_eprintid\n", " roar_rev_number\n", @@ -4417,6 +4459,9 @@ " NaN\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " 8237\n", " 17\n", " archive\n", @@ -4599,6 +4644,9 @@ " NaN\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " 2820\n", " 525\n", " archive\n", @@ -4639,7 +4687,7 @@ " TRUE\n", " TRUE\n", " FALSE\n", - " [USU Library, University of Sumatera Utara]\n", + " [University of Sumatera Utara, USU Library]\n", " [http://library.usu.ac.id, http://www.usu.ac.id]\n", " id\n", " Medan\n", @@ -4661,8 +4709,8 @@ " NaN\n", " NaN\n", " NaN\n", - " [roarmap, opendoar, celestial]\n", - " [283, 1717, 2101]\n", + " [roarmap, celestial, opendoar]\n", + " [1717, 2101, 283]\n", " NaN\n", " NaN\n", " NaN\n", @@ -4781,6 +4829,9 @@ " NaN\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " 9487\n", " 16\n", " archive\n", @@ -4963,6 +5014,9 @@ " NaN\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " 1241\n", " 583\n", " archive\n", @@ -5025,8 +5079,8 @@ " 0\n", " 0\n", " 0\n", - " [opendoar, celestial]\n", - " [1779, 1627]\n", + " [celestial, opendoar]\n", + " [1627, 1779]\n", " NaN\n", " NaN\n", " NaN\n", @@ -5145,6 +5199,9 @@ " NaN\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " 15142\n", " 11\n", " archive\n", @@ -5584,12 +5641,12 @@ "3 NaN \n", "4 NaN \n", "\n", - " OpenDOAR_repository_metadata.content_subjects_phrases \\\n", - "0 NaN \n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN \n", + " OpenDOAR_repository_metadata.content_subjects \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", "\n", " OpenDOAR_repository_metadata.content_types OpenDOAR_organization \\\n", "0 NaN NaN \n", @@ -5612,26 +5669,47 @@ "3 NaN \n", "4 NaN \n", "\n", - " OpenDOAR_system_metadata.publicly_visible OpenDOAR_unique_id roar_eprintid \\\n", - "0 NaN NaN 8237 \n", - "1 NaN NaN 2820 \n", - "2 NaN NaN 9487 \n", - "3 NaN NaN 1241 \n", - "4 NaN NaN 15142 \n", + " OpenDOAR_system_metadata.publicly_visible \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", "\n", - " roar_rev_number roar_eprint_status roar_userid roar_importid roar_source \\\n", - "0 17 archive 5268 NaN NaN \n", - "1 525 archive 65 NaN NaN \n", - "2 16 archive 6458 NaN NaN \n", - "3 583 archive 1 NaN NaN \n", - "4 11 archive 12132 NaN NaN \n", + " OpenDOAR_repository_metadata.repository_status \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", "\n", - " roar_dir roar_datestamp roar_lastmod \\\n", - "0 disk0/00/00/82/37 2014-05-15 11:23:30 2014-05-19 05:42:47 \n", - "1 disk0/00/00/28/20 2010-07-29 01:40:27 2012-01-19 11:37:49 \n", - "2 disk0/00/00/94/87 2015-05-15 14:03:55 2016-03-21 20:21:02 \n", - "3 disk0/00/00/12/41 2010-01-06 13:45:32 2011-07-18 05:57:23 \n", - "4 disk0/00/01/51/42 2020-08-08 12:35:50 2021-01-25 22:45:10 \n", + " OpenDOAR_repository_metadata.fulltext_record_count \\\n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + " OpenDOAR_repository_metadata.metadata_record_count OpenDOAR_unique_id \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " roar_eprintid roar_rev_number roar_eprint_status roar_userid roar_importid \\\n", + "0 8237 17 archive 5268 NaN \n", + "1 2820 525 archive 65 NaN \n", + "2 9487 16 archive 6458 NaN \n", + "3 1241 583 archive 1 NaN \n", + "4 15142 11 archive 12132 NaN \n", + "\n", + " roar_source roar_dir roar_datestamp roar_lastmod \\\n", + "0 NaN disk0/00/00/82/37 2014-05-15 11:23:30 2014-05-19 05:42:47 \n", + "1 NaN disk0/00/00/28/20 2010-07-29 01:40:27 2012-01-19 11:37:49 \n", + "2 NaN disk0/00/00/94/87 2015-05-15 14:03:55 2016-03-21 20:21:02 \n", + "3 NaN disk0/00/00/12/41 2010-01-06 13:45:32 2011-07-18 05:57:23 \n", + "4 NaN disk0/00/01/51/42 2020-08-08 12:35:50 2021-01-25 22:45:10 \n", "\n", " roar_status_changed roar_type roar_succeeds roar_commentary \\\n", "0 2014-05-15 11:23:30 institutional NaN NaN \n", @@ -5719,7 +5797,7 @@ "\n", " roar_open_access roar_mandate roar_organisation_title \\\n", "0 FALSE TRUE Università degli Studi di Milano \n", - "1 TRUE FALSE [USU Library, University of Sumatera Utara] \n", + "1 TRUE FALSE [University of Sumatera Utara, USU Library] \n", "2 TRUE FALSE National Research Foundation of South Africa \n", "3 NaN NaN Swansea Metropolitan University \n", "4 FALSE FALSE NaN \n", @@ -5775,9 +5853,9 @@ "\n", " roar_registry_name roar_registry_id \\\n", "0 celestial 1596 \n", - "1 [roarmap, opendoar, celestial] [283, 1717, 2101] \n", + "1 [roarmap, celestial, opendoar] [1717, 2101, 283] \n", "2 roarmap NaN \n", - "3 [opendoar, celestial] [1779, 1627] \n", + "3 [celestial, opendoar] [1627, 1779] \n", "4 opendoar http://v2.sherpa.ac.uk/id/repository/4422 \n", "\n", " roar_submit_to roar_submitted_to_name roar_submitted_to_done \\\n", @@ -5809,7 +5887,7 @@ "4 NaN NaN roar_15142 " ] }, - "execution_count": 24, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -5824,7 +5902,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 29, "metadata": {}, "outputs": [ { @@ -5943,13 +6021,16 @@ " OpenDOAR_repository_metadata.content_languages\n", " OpenDOAR_system_metadata.date_modified\n", " OpenDOAR_system_metadata.date_created\n", - " OpenDOAR_repository_metadata.content_subjects_phrases\n", + " OpenDOAR_repository_metadata.content_subjects\n", " OpenDOAR_repository_metadata.content_types\n", " OpenDOAR_organization\n", " OpenDOAR_policy_urls\n", " OpenDOAR_repository_metadata.software\n", " OpenDOAR_repository_metadata.oai_url\n", " OpenDOAR_system_metadata.publicly_visible\n", + " OpenDOAR_repository_metadata.repository_status\n", + " OpenDOAR_repository_metadata.fulltext_record_count\n", + " OpenDOAR_repository_metadata.metadata_record_count\n", " OpenDOAR_unique_id\n", " roar_eprintid\n", " roar_rev_number\n", @@ -6127,13 +6208,16 @@ " [\"es\"]\n", " 2019-10-17 14:34:31\n", " 2010-12-01 11:11:57\n", - " [business and economics, education]\n", + " [\"business and economics\", \"education\"]\n", " [journal_articles, conference_and_workshop_pap...\n", " [{'name': 'escuela de hotelería y turismo de c...\n", " []\n", " {\"name\": \"dspace\", \"version\": \"1.6.2\"}\n", " NaN\n", " yes\n", + " trial\n", + " NaN\n", + " 286.0\n", " OpenDOAR_1996\n", " NaN\n", " NaN\n", @@ -6317,6 +6401,9 @@ " NaN\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " 2312\n", " 736\n", " archive\n", @@ -6379,8 +6466,8 @@ " NaN\n", " NaN\n", " NaN\n", - " [opendoar, celestial]\n", - " [1832, 1149]\n", + " [celestial, opendoar]\n", + " [1149, 1832]\n", " NaN\n", " NaN\n", " NaN\n", @@ -6491,13 +6578,16 @@ " [\"ja\", \"en\"]\n", " 2020-09-09 11:57:56\n", " 2007-10-09 09:09:40\n", - " [technology general]\n", + " [\"technology general\"]\n", " [journal_articles, unpub_reports_and_working_p...\n", " [{'name': 'kitami institute of technology', 'a...\n", " []\n", " {\"name\": \"weko\", \"version\": \"\"}\n", " http://kitami-it.repo.nii.ac.jp/oai\n", " yes\n", + " fully_functional\n", + " 1534.0\n", + " 8681.0\n", " OpenDOAR_1035\n", " NaN\n", " NaN\n", @@ -6681,6 +6771,9 @@ " NaN\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " 5779\n", " 9\n", " archive\n", @@ -6743,8 +6836,8 @@ " NaN\n", " NaN\n", " NaN\n", - " [opendoar, celestial]\n", - " [2545, 5072]\n", + " [celestial, opendoar]\n", + " [5072, 2545]\n", " NaN\n", " NaN\n", " NaN\n", @@ -6863,6 +6956,9 @@ " NaN\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " 11212\n", " 12\n", " archive\n", @@ -6912,7 +7008,7 @@ " opus\n", " geoname_2_DE\n", " other\n", - " [HB, GE, T1]\n", + " [T1, HB, GE]\n", " 2016-04-28 13:58:38\n", " NaN\n", " please delete ID 5891\n", @@ -6925,7 +7021,7 @@ " NaN\n", " NaN\n", " NaN\n", - " [opendoar, celestial]\n", + " [celestial, opendoar]\n", " [2539, 6112]\n", " NaN\n", " NaN\n", @@ -7316,12 +7412,12 @@ "3 NaN \n", "4 NaN \n", "\n", - " OpenDOAR_repository_metadata.content_subjects_phrases \\\n", - "0 [business and economics, education] \n", - "1 NaN \n", - "2 [technology general] \n", - "3 NaN \n", - "4 NaN \n", + " OpenDOAR_repository_metadata.content_subjects \\\n", + "0 [\"business and economics\", \"education\"] \n", + "1 NaN \n", + "2 [\"technology general\"] \n", + "3 NaN \n", + "4 NaN \n", "\n", " OpenDOAR_repository_metadata.content_types \\\n", "0 [journal_articles, conference_and_workshop_pap... \n", @@ -7351,26 +7447,47 @@ "3 NaN \n", "4 NaN \n", "\n", - " OpenDOAR_system_metadata.publicly_visible OpenDOAR_unique_id roar_eprintid \\\n", - "0 yes OpenDOAR_1996 NaN \n", - "1 NaN NaN 2312 \n", - "2 yes OpenDOAR_1035 NaN \n", - "3 NaN NaN 5779 \n", - "4 NaN NaN 11212 \n", + " OpenDOAR_system_metadata.publicly_visible \\\n", + "0 yes \n", + "1 NaN \n", + "2 yes \n", + "3 NaN \n", + "4 NaN \n", "\n", - " roar_rev_number roar_eprint_status roar_userid roar_importid roar_source \\\n", - "0 NaN NaN NaN NaN NaN \n", - "1 736 archive 1 NaN NaN \n", - "2 NaN NaN NaN NaN NaN \n", - "3 9 archive 8 NaN NaN \n", - "4 12 archive 5611 NaN NaN \n", + " OpenDOAR_repository_metadata.repository_status \\\n", + "0 trial \n", + "1 NaN \n", + "2 fully_functional \n", + "3 NaN \n", + "4 NaN \n", "\n", - " roar_dir roar_datestamp roar_lastmod \\\n", - "0 NaN NaN NaN \n", - "1 disk0/00/00/23/12 2010-01-14 12:10:06 2011-07-18 06:01:08 \n", - "2 NaN NaN NaN \n", - "3 disk0/00/00/57/79 2012-12-12 04:54:20 2012-12-15 02:36:20 \n", - "4 disk0/00/01/12/12 2016-05-04 11:37:14 2016-05-07 01:37:18 \n", + " OpenDOAR_repository_metadata.fulltext_record_count \\\n", + "0 NaN \n", + "1 NaN \n", + "2 1534.0 \n", + "3 NaN \n", + "4 NaN \n", + "\n", + " OpenDOAR_repository_metadata.metadata_record_count OpenDOAR_unique_id \\\n", + "0 286.0 OpenDOAR_1996 \n", + "1 NaN NaN \n", + "2 8681.0 OpenDOAR_1035 \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " roar_eprintid roar_rev_number roar_eprint_status roar_userid roar_importid \\\n", + "0 NaN NaN NaN NaN NaN \n", + "1 2312 736 archive 1 NaN \n", + "2 NaN NaN NaN NaN NaN \n", + "3 5779 9 archive 8 NaN \n", + "4 11212 12 archive 5611 NaN \n", + "\n", + " roar_source roar_dir roar_datestamp roar_lastmod \\\n", + "0 NaN NaN NaN NaN \n", + "1 NaN disk0/00/00/23/12 2010-01-14 12:10:06 2011-07-18 06:01:08 \n", + "2 NaN NaN NaN NaN \n", + "3 NaN disk0/00/00/57/79 2012-12-12 04:54:20 2012-12-15 02:36:20 \n", + "4 NaN disk0/00/01/12/12 2016-05-04 11:37:14 2016-05-07 01:37:18 \n", "\n", " roar_status_changed roar_type roar_succeeds roar_commentary \\\n", "0 NaN NaN NaN NaN \n", @@ -7489,7 +7606,7 @@ "1 other NaN 2005-06-07 12:57:08 NaN \n", "2 NaN NaN NaN NaN \n", "3 other NaN 2012-08-05 15:12:12 NaN \n", - "4 other [HB, GE, T1] 2016-04-28 13:58:38 NaN \n", + "4 other [T1, HB, GE] 2016-04-28 13:58:38 NaN \n", "\n", " roar_suggestions roar_activity_low roar_activity_medium \\\n", "0 NaN NaN NaN \n", @@ -7521,10 +7638,10 @@ "\n", " roar_registry_name roar_registry_id roar_submit_to \\\n", "0 NaN NaN NaN \n", - "1 [opendoar, celestial] [1832, 1149] NaN \n", + "1 [celestial, opendoar] [1149, 1832] NaN \n", "2 NaN NaN NaN \n", - "3 [opendoar, celestial] [2545, 5072] NaN \n", - "4 [opendoar, celestial] [2539, 6112] NaN \n", + "3 [celestial, opendoar] [5072, 2545] NaN \n", + "4 [celestial, opendoar] [2539, 6112] NaN \n", "\n", " roar_submitted_to_name roar_submitted_to_done roar_webometrics_rank \\\n", "0 NaN NaN NaN \n", @@ -7555,7 +7672,7 @@ "4 NaN NaN roar_11212 " ] }, - "execution_count": 25, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -7570,7 +7687,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -7689,13 +7806,16 @@ " OpenDOAR_repository_metadata.content_languages\n", " OpenDOAR_system_metadata.date_modified\n", " OpenDOAR_system_metadata.date_created\n", - " OpenDOAR_repository_metadata.content_subjects_phrases\n", + " OpenDOAR_repository_metadata.content_subjects\n", " OpenDOAR_repository_metadata.content_types\n", " OpenDOAR_organization\n", " OpenDOAR_policy_urls\n", " OpenDOAR_repository_metadata.software\n", " OpenDOAR_repository_metadata.oai_url\n", " OpenDOAR_system_metadata.publicly_visible\n", + " OpenDOAR_repository_metadata.repository_status\n", + " OpenDOAR_repository_metadata.fulltext_record_count\n", + " OpenDOAR_repository_metadata.metadata_record_count\n", " OpenDOAR_unique_id\n", " roar_eprintid\n", " roar_rev_number\n", @@ -7873,13 +7993,16 @@ " [\"zh\", \"nl\", \"en\", \"fr\", \"de\", \"it\", \"ja\", \"pt...\n", " 2021-09-13 13:35:44\n", " 2007-10-10 16:16:02\n", - " [multidisciplinary]\n", + " [\"multidisciplinary\"]\n", " [journal_articles, conference_and_workshop_pap...\n", " [{'name': 'university of oxford', 'alternative...\n", " [{\"policy_url\": \"https://libguides.bodleian.ox...\n", " {\"name\": \"fedora\", \"version\": \"4.6.2\"}\n", " https://ora.ox.ac.uk/oai2\n", " yes\n", + " fully_functional\n", + " 20.0\n", + " 239671.0\n", " OpenDOAR_1064\n", " NaN\n", " NaN\n", @@ -8055,13 +8178,16 @@ " [\"en\"]\n", " 2021-02-18 18:13:34\n", " 2019-09-28 04:24:47\n", - " [multidisciplinary]\n", + " [\"multidisciplinary\"]\n", " [journal_articles, conference_and_workshop_pap...\n", " [{'name': 'georgia southern university', 'alte...\n", " []\n", " {\"name\": \"digital_commons\", \"version\": \"\"}\n", " https://digitalcommons.georgiasouthern.edu/do/oai\n", " yes\n", + " fully_functional\n", + " 26851.0\n", + " 78076.0\n", " OpenDOAR_8648\n", " NaN\n", " NaN\n", @@ -8237,13 +8363,16 @@ " [\"en\", \"ja\"]\n", " 2021-05-21 18:04:32\n", " 2020-07-13 10:09:55\n", - " [science general]\n", + " [\"science general\"]\n", " [journal_articles, conference_and_workshop_pap...\n", " [{'name': 'national institute for materials sc...\n", " []\n", " {\"name\": \"fedora\", \"version\": \"\"}\n", " https://mdr.nims.go.jp/catalog/oai\n", " yes\n", + " fully_functional\n", + " NaN\n", + " NaN\n", " OpenDOAR_9713\n", " NaN\n", " NaN\n", @@ -8419,13 +8548,16 @@ " [\"en\"]\n", " 2021-09-13 13:35:39\n", " 2006-08-04 09:09:20\n", - " [multidisciplinary]\n", + " [\"multidisciplinary\"]\n", " [journal_articles, theses_and_dissertations, u...\n", " [{'name': 'university of maryland', 'alternati...\n", " [{\"policy_url\": \"http://drum.lib.umd.edu/page/...\n", " {\"name\": \"dspace\", \"version\": \"4.1.0\"}\n", " http://drum.lib.umd.edu/oai/request\n", " yes\n", + " fully_functional\n", + " NaN\n", + " 20513.0\n", " OpenDOAR_427\n", " NaN\n", " NaN\n", @@ -8564,9 +8696,9 @@ " NaN\n", " NaN\n", " [\"eng\"]\n", - " [1 Humanities and Social Sciences, 11 Humaniti...\n", + " [{'name': '1 Humanities and Social Sciences', ...\n", " NaN\n", - " [Audiovisual data, Images, Standard office doc...\n", + " [{'name': 'Audiovisual data', 'scheme': 'parse...\n", " [dataProvider]\n", " [multidisciplinary]\n", " [{'institutionName': 'University of Calgary, L...\n", @@ -8579,7 +8711,7 @@ " [{\"dataUploadLicenseName\": \"Submission Policy\"...\n", " [\"DSpace\"]\n", " NaN\n", - " []\n", + " {}\n", " [\"DOI\", \"hdl\"]\n", " NaN\n", " []\n", @@ -8685,6 +8817,9 @@ " NaN\n", " NaN\n", " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " \n", " \n", "\n", @@ -8962,7 +9097,7 @@ "1 NaN \n", "2 NaN \n", "3 NaN \n", - "4 [1 Humanities and Social Sciences, 11 Humaniti... \n", + "4 [{'name': '1 Humanities and Social Sciences', ... \n", "\n", " re3data_missionStatementURL \\\n", "0 NaN \n", @@ -8976,7 +9111,7 @@ "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", - "4 [Audiovisual data, Images, Standard office doc... [dataProvider] \n", + "4 [{'name': 'Audiovisual data', 'scheme': 'parse... [dataProvider] \n", "\n", " re3data_keyword re3data_institution \\\n", "0 NaN NaN \n", @@ -9025,7 +9160,7 @@ "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", - "4 NaN [] [\"DOI\", \"hdl\"] \n", + "4 NaN {} [\"DOI\", \"hdl\"] \n", "\n", " re3data_citationGuidelineURL re3data_aidSystem re3data_enhancedPublication \\\n", "0 NaN NaN NaN \n", @@ -9118,12 +9253,12 @@ "3 2006-08-04 09:09:20 \n", "4 NaN \n", "\n", - " OpenDOAR_repository_metadata.content_subjects_phrases \\\n", - "0 [multidisciplinary] \n", - "1 [multidisciplinary] \n", - "2 [science general] \n", - "3 [multidisciplinary] \n", - "4 NaN \n", + " OpenDOAR_repository_metadata.content_subjects \\\n", + "0 [\"multidisciplinary\"] \n", + "1 [\"multidisciplinary\"] \n", + "2 [\"science general\"] \n", + "3 [\"multidisciplinary\"] \n", + "4 NaN \n", "\n", " OpenDOAR_repository_metadata.content_types \\\n", "0 [journal_articles, conference_and_workshop_pap... \n", @@ -9160,47 +9295,68 @@ "3 http://drum.lib.umd.edu/oai/request \n", "4 NaN \n", "\n", - " OpenDOAR_system_metadata.publicly_visible OpenDOAR_unique_id roar_eprintid \\\n", - "0 yes OpenDOAR_1064 NaN \n", - "1 yes OpenDOAR_8648 NaN \n", - "2 yes OpenDOAR_9713 NaN \n", - "3 yes OpenDOAR_427 NaN \n", - "4 NaN NaN NaN \n", + " OpenDOAR_system_metadata.publicly_visible \\\n", + "0 yes \n", + "1 yes \n", + "2 yes \n", + "3 yes \n", + "4 NaN \n", "\n", - " roar_rev_number roar_eprint_status roar_userid roar_importid roar_source \\\n", - "0 NaN NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN NaN \n", - "3 NaN NaN NaN NaN NaN \n", - "4 NaN NaN NaN NaN NaN \n", + " OpenDOAR_repository_metadata.repository_status \\\n", + "0 fully_functional \n", + "1 fully_functional \n", + "2 fully_functional \n", + "3 fully_functional \n", + "4 NaN \n", "\n", - " roar_dir roar_datestamp roar_lastmod roar_status_changed roar_type \\\n", - "0 NaN NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN NaN \n", - "3 NaN NaN NaN NaN NaN \n", - "4 NaN NaN NaN NaN NaN \n", + " OpenDOAR_repository_metadata.fulltext_record_count \\\n", + "0 20.0 \n", + "1 26851.0 \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", "\n", - " roar_succeeds roar_commentary roar_metadata_visibility roar_latitude \\\n", - "0 NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN \n", - "3 NaN NaN NaN NaN \n", - "4 NaN NaN NaN NaN \n", + " OpenDOAR_repository_metadata.metadata_record_count OpenDOAR_unique_id \\\n", + "0 239671.0 OpenDOAR_1064 \n", + "1 78076.0 OpenDOAR_8648 \n", + "2 NaN OpenDOAR_9713 \n", + "3 20513.0 OpenDOAR_427 \n", + "4 NaN NaN \n", "\n", - " roar_longitude roar_relation_type roar_relation_uri roar_item_issues_id \\\n", - "0 NaN NaN NaN NaN \n", - "1 NaN NaN NaN NaN \n", - "2 NaN NaN NaN NaN \n", - "3 NaN NaN NaN NaN \n", - "4 NaN NaN NaN NaN \n", + " roar_eprintid roar_rev_number roar_eprint_status roar_userid roar_importid \\\n", + "0 NaN NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN NaN \n", "\n", - " roar_item_issues_type roar_item_issues_description \\\n", - "0 NaN NaN \n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", + " roar_source roar_dir roar_datestamp roar_lastmod roar_status_changed \\\n", + "0 NaN NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN NaN \n", + "\n", + " roar_type roar_succeeds roar_commentary roar_metadata_visibility \\\n", + "0 NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN \n", + "\n", + " roar_latitude roar_longitude roar_relation_type roar_relation_uri \\\n", + "0 NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN \n", + "2 NaN NaN NaN NaN \n", + "3 NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN \n", + "\n", + " roar_item_issues_id roar_item_issues_type roar_item_issues_description \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", "\n", " roar_item_issues_timestamp roar_item_issues_status \\\n", "0 NaN NaN \n", @@ -9322,7 +9478,7 @@ "4 NaN NaN " ] }, - "execution_count": 26, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -9337,18 +9493,18 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - ":1: PerformanceWarning:\n", + ":1: PerformanceWarning:\n", "\n", "DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", "\n", - ":2: PerformanceWarning:\n", + ":2: PerformanceWarning:\n", "\n", "DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", "\n" @@ -9470,13 +9626,16 @@ " OpenDOAR_repository_metadata.content_languages\n", " OpenDOAR_system_metadata.date_modified\n", " OpenDOAR_system_metadata.date_created\n", - " OpenDOAR_repository_metadata.content_subjects_phrases\n", + " OpenDOAR_repository_metadata.content_subjects\n", " OpenDOAR_repository_metadata.content_types\n", " OpenDOAR_organization\n", " OpenDOAR_policy_urls\n", " OpenDOAR_repository_metadata.software\n", " OpenDOAR_repository_metadata.oai_url\n", " OpenDOAR_system_metadata.publicly_visible\n", + " OpenDOAR_repository_metadata.repository_status\n", + " OpenDOAR_repository_metadata.fulltext_record_count\n", + " OpenDOAR_repository_metadata.metadata_record_count\n", " OpenDOAR_unique_id\n", " roar_eprintid\n", " roar_rev_number\n", @@ -9663,6 +9822,9 @@ " [nan, nan]\n", " [nan, nan]\n", " [nan, nan]\n", + " [nan, nan]\n", + " [nan, nan]\n", + " [nan, nan]\n", " [4612, 4649]\n", " [28, 8]\n", " [archive, archive]\n", @@ -9712,7 +9874,7 @@ " [dspace, dspace]\n", " [geoname_2_IN, geoname_2_IN]\n", " [other, other]\n", - " [[TP, TN, TJ, TH, TK, TD, TA], [TA, T1]]\n", + " [[TD, TP, TH, TJ, TK, TN, TA], [T1, TA]]\n", " [2011-12-15 09:01:35, 2012-01-05 12:09:37]\n", " [nan, nan]\n", " [nan, nan]\n", @@ -9846,6 +10008,9 @@ " [nan, nan]\n", " [nan, nan]\n", " [nan, nan]\n", + " [nan, nan]\n", + " [nan, nan]\n", + " [nan, nan]\n", " [7943, 8003]\n", " [16, 19]\n", " [archive, archive]\n", @@ -9895,7 +10060,7 @@ " [eprints, eprints]\n", " [geoname_2_IN, geoname_2_IN]\n", " [3.3.15 eps, 3.3.15 eps]\n", - " [[RB, RM], [R1, RZ]]\n", + " [[RB, RM], [RZ, R1]]\n", " [2014-03-07 15:07:45, 2014-03-19 07:05:04]\n", " [The National Institute for Research in Tuberc...\n", " [nan, Please include \"Tuberculosis\" as a Speci...\n", @@ -9908,7 +10073,7 @@ " [nan, nan]\n", " [nan, nan]\n", " [nan, nan]\n", - " [[opendoar, celestial], celestial]\n", + " [[celestial, opendoar], celestial]\n", " [[5410, 2725], 5430]\n", " [nan, nan]\n", " [nan, nan]\n", @@ -10029,6 +10194,9 @@ " [nan, nan, nan]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", + " [nan, nan, nan]\n", + " [nan, nan, nan]\n", + " [nan, nan, nan]\n", " [2670, 2698, 2741]\n", " [470, 317, 231]\n", " [archive, archive, archive]\n", @@ -10091,7 +10259,7 @@ " [nan, nan, nan]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", - " [[opendoar, celestial], opendoar, opendoar]\n", + " [[celestial, opendoar], opendoar, opendoar]\n", " [[2426, 1781], 1781, 1807]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", @@ -10212,6 +10380,9 @@ " [nan, nan]\n", " [nan, nan]\n", " [nan, nan]\n", + " [nan, nan]\n", + " [nan, nan]\n", + " [nan, nan]\n", " [4393, 4394]\n", " [14, 14]\n", " [archive, archive]\n", @@ -10395,6 +10566,9 @@ " [nan, nan]\n", " [nan, nan]\n", " [nan, nan]\n", + " [nan, nan]\n", + " [nan, nan]\n", + " [nan, nan]\n", " [1019, 5550]\n", " [526, 9]\n", " [archive, archive]\n", @@ -10457,8 +10631,8 @@ " [nan, nan]\n", " [nan, nan]\n", " [nan, nan]\n", - " [[roarmap, opendoar, celestial], [opendoar, ce...\n", - " [[193, 1456, 1441], [1456, 1441]]\n", + " [[roarmap, celestial, opendoar], [celestial, o...\n", + " [[1441, 193, 1456], [1441, 1456]]\n", " [nan, nan]\n", " [nan, nan]\n", " [nan, nan]\n", @@ -10842,12 +11016,12 @@ "3 [nan, nan] \n", "4 [nan, nan] \n", "\n", - " OpenDOAR_repository_metadata.content_subjects_phrases \\\n", - "0 [nan, nan] \n", - "1 [nan, nan] \n", - "2 [nan, nan, nan] \n", - "3 [nan, nan] \n", - "4 [nan, nan] \n", + " OpenDOAR_repository_metadata.content_subjects \\\n", + "0 [nan, nan] \n", + "1 [nan, nan] \n", + "2 [nan, nan, nan] \n", + "3 [nan, nan] \n", + "4 [nan, nan] \n", "\n", " OpenDOAR_repository_metadata.content_types OpenDOAR_organization \\\n", "0 [nan, nan] [nan, nan] \n", @@ -10870,12 +11044,33 @@ "3 [nan, nan] \n", "4 [nan, nan] \n", "\n", - " OpenDOAR_system_metadata.publicly_visible OpenDOAR_unique_id \\\n", - "0 [nan, nan] [nan, nan] \n", - "1 [nan, nan] [nan, nan] \n", - "2 [nan, nan, nan] [nan, nan, nan] \n", - "3 [nan, nan] [nan, nan] \n", - "4 [nan, nan] [nan, nan] \n", + " OpenDOAR_system_metadata.publicly_visible \\\n", + "0 [nan, nan] \n", + "1 [nan, nan] \n", + "2 [nan, nan, nan] \n", + "3 [nan, nan] \n", + "4 [nan, nan] \n", + "\n", + " OpenDOAR_repository_metadata.repository_status \\\n", + "0 [nan, nan] \n", + "1 [nan, nan] \n", + "2 [nan, nan, nan] \n", + "3 [nan, nan] \n", + "4 [nan, nan] \n", + "\n", + " OpenDOAR_repository_metadata.fulltext_record_count \\\n", + "0 [nan, nan] \n", + "1 [nan, nan] \n", + "2 [nan, nan, nan] \n", + "3 [nan, nan] \n", + "4 [nan, nan] \n", + "\n", + " OpenDOAR_repository_metadata.metadata_record_count OpenDOAR_unique_id \\\n", + "0 [nan, nan] [nan, nan] \n", + "1 [nan, nan] [nan, nan] \n", + "2 [nan, nan, nan] [nan, nan, nan] \n", + "3 [nan, nan] [nan, nan] \n", + "4 [nan, nan] [nan, nan] \n", "\n", " roar_eprintid roar_rev_number roar_eprint_status \\\n", "0 [4612, 4649] [28, 8] [archive, archive] \n", @@ -11060,8 +11255,8 @@ "4 [geoname_2_HK, geoname_2_CN] [other, other] \n", "\n", " roar_subjects \\\n", - "0 [[TP, TN, TJ, TH, TK, TD, TA], [TA, T1]] \n", - "1 [[RB, RM], [R1, RZ]] \n", + "0 [[TD, TP, TH, TJ, TK, TN, TA], [T1, TA]] \n", + "1 [[RB, RM], [RZ, R1]] \n", "2 [nan, nan, nan] \n", "3 [nan, nan] \n", "4 [nan, nan] \n", @@ -11110,17 +11305,17 @@ "\n", " roar_registry_name \\\n", "0 [celestial, celestial] \n", - "1 [[opendoar, celestial], celestial] \n", - "2 [[opendoar, celestial], opendoar, opendoar] \n", + "1 [[celestial, opendoar], celestial] \n", + "2 [[celestial, opendoar], opendoar, opendoar] \n", "3 [celestial, celestial] \n", - "4 [[roarmap, opendoar, celestial], [opendoar, ce... \n", + "4 [[roarmap, celestial, opendoar], [celestial, o... \n", "\n", " roar_registry_id roar_submit_to roar_submitted_to_name \\\n", "0 [4790, 4789] [nan, nan] [nan, nan] \n", "1 [[5410, 2725], 5430] [nan, nan] [nan, nan] \n", "2 [[2426, 1781], 1781, 1807] [nan, nan, nan] [nan, nan, nan] \n", "3 [4715, 4715] [nan, nan] [nan, nan] \n", - "4 [[193, 1456, 1441], [1456, 1441]] [nan, nan] [nan, nan] \n", + "4 [[1441, 193, 1456], [1441, 1456]] [nan, nan] [nan, nan] \n", "\n", " roar_submitted_to_done roar_webometrics_rank roar_webometrics_size \\\n", "0 [nan, nan] [nan, nan] [nan, nan] \n", @@ -11151,7 +11346,7 @@ "4 [nan, nan] [roar_1019, roar_5550] {roar} " ] }, - "execution_count": 27, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -11164,18 +11359,18 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - ":1: PerformanceWarning:\n", + ":1: PerformanceWarning:\n", "\n", "DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", "\n", - ":2: PerformanceWarning:\n", + ":2: PerformanceWarning:\n", "\n", "DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", "\n" @@ -11297,13 +11492,16 @@ " OpenDOAR_repository_metadata.content_languages\n", " OpenDOAR_system_metadata.date_modified\n", " OpenDOAR_system_metadata.date_created\n", - " OpenDOAR_repository_metadata.content_subjects_phrases\n", + " OpenDOAR_repository_metadata.content_subjects\n", " OpenDOAR_repository_metadata.content_types\n", " OpenDOAR_organization\n", " OpenDOAR_policy_urls\n", " OpenDOAR_repository_metadata.software\n", " OpenDOAR_repository_metadata.oai_url\n", " OpenDOAR_system_metadata.publicly_visible\n", + " OpenDOAR_repository_metadata.repository_status\n", + " OpenDOAR_repository_metadata.fulltext_record_count\n", + " OpenDOAR_repository_metadata.metadata_record_count\n", " OpenDOAR_unique_id\n", " roar_eprintid\n", " roar_rev_number\n", @@ -11482,13 +11680,16 @@ " [nan, [\"es\"], [\"es\"]]\n", " [nan, 2021-09-13 13:35:56, 2021-09-13 13:36:17]\n", " [nan, 2012-02-28 12:12:09, 2019-02-19 10:51:49]\n", - " [nan, [multidisciplinary], [business and econo...\n", + " [nan, [\"multidisciplinary\"], [\"business and ec...\n", " [nan, [journal_articles, theses_and_dissertati...\n", " [nan, [{'name': 'universidad nacional autónoma...\n", " [nan, [{\"policy_url\": \"http://ru.iiec.unam.mx/...\n", " [nan, {\"name\": \"eprints\", \"version\": \"3.3.15\"}...\n", " [nan, http://ru.iiec.unam.mx/cgi/oai2, nan]\n", " [nan, yes, yes]\n", + " [nan, fully_functional, fully_functional]\n", + " [nan, nan, nan]\n", + " [nan, 3066.0, nan]\n", " [nan, OpenDOAR_2429, OpenDOAR_4320]\n", " [4745, nan, nan]\n", " [31, nan, nan]\n", @@ -11539,7 +11740,7 @@ " [eprints, nan, nan]\n", " [geoname_2_MX, nan, nan]\n", " [3.3.15 eps, nan, nan]\n", - " [[GF, HJ, HT, HB, HM, HC, HX, HN, H1, G1, T1, ...\n", + " [[HA, HG, GF, HB, HC, JA, HX, HF, T1, HJ, H1, ...\n", " [2012-02-03 05:18:16, nan, nan]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", @@ -11552,7 +11753,7 @@ " [nan, nan, nan]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", - " [[opendoar, celestial], nan, nan]\n", + " [[celestial, opendoar], nan, nan]\n", " [[2429, 4818], nan, nan]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", @@ -11665,13 +11866,16 @@ " [[\"en\"], [\"en\"], nan]\n", " [2021-09-13 13:36:06, 2021-02-18 18:01:12, nan]\n", " [2014-06-16 13:36:00, 2019-03-26 14:07:30, nan]\n", - " [[multidisciplinary], [multidisciplinary], nan]\n", + " [[\"multidisciplinary\"], [\"multidisciplinary\"],...\n", " [[journal_articles], [journal_articles, biblio...\n", " [[{'name': 'landmark university', 'alternative...\n", " [[{\"policy_url\": \"http://eprints.lmu.edu.ng/po...\n", " [{\"name\": \"eprints\", \"version\": \"3.3.12\"}, {\"n...\n", " [http://eprints.lmu.edu.ng/cgi/oai2, nan, nan]\n", " [yes, yes, nan]\n", + " [fully_functional, fully_functional, nan]\n", + " [nan, nan, nan]\n", + " [507.0, nan, nan]\n", " [OpenDOAR_3087, OpenDOAR_4500, nan]\n", " [nan, nan, 8504]\n", " [nan, nan, 12]\n", @@ -11735,8 +11939,8 @@ " [nan, nan, nan]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", - " [nan, nan, [opendoar, celestial]]\n", - " [nan, nan, [5621, 3087]]\n", + " [nan, nan, [celestial, opendoar]]\n", + " [nan, nan, [3087, 5621]]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", @@ -11848,13 +12052,16 @@ " [[\"pl\"], nan, nan]\n", " [2019-10-17 14:34:36, nan, nan]\n", " [2011-10-11 13:13:58, nan, nan]\n", - " [[multidisciplinary], nan, nan]\n", + " [[\"multidisciplinary\"], nan, nan]\n", " [[journal_articles], nan, nan]\n", " [[{'name': 'iława', 'alternativeName': '', 'co...\n", " [[], nan, nan]\n", " [{\"name\": \"dlibra\", \"version\": \"4\"}, nan, nan]\n", " [http://ibc.ilawa.pl/dlibra/oai-pmh-repository...\n", " [yes, nan, nan]\n", + " [fully_functional, nan, nan]\n", + " [0.0, nan, nan]\n", + " [3397.0, nan, nan]\n", " [OpenDOAR_2318, nan, nan]\n", " [nan, 5503, 4271]\n", " [nan, 9, 11]\n", @@ -11918,8 +12125,8 @@ " [nan, nan, nan]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", - " [nan, [opendoar, celestial], [opendoar, celest...\n", - " [nan, [2318, 4672], [2318, 4672]]\n", + " [nan, [celestial, opendoar], [celestial, opend...\n", + " [nan, [4672, 2318], [4672, 2318]]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", @@ -12031,13 +12238,16 @@ " [nan, nan, [\"pt\"]]\n", " [nan, nan, 2019-10-17 14:34:23]\n", " [nan, nan, 2009-05-01 10:10:47]\n", - " [nan, nan, [education]]\n", + " [nan, nan, [\"education\"]]\n", " [nan, nan, [theses_and_dissertations, unpub_re...\n", " [nan, nan, [{'name': 'ação educativa', 'altern...\n", " [nan, nan, []]\n", " [nan, nan, {\"name\": \"dspace\", \"version\": \"\"}]\n", " [nan, nan, http://www.bdae.org.br/dspace-oai/r...\n", " [nan, nan, yes]\n", + " [nan, nan, fully_functional]\n", + " [nan, nan, 0.0]\n", + " [nan, nan, 2157.0]\n", " [nan, nan, OpenDOAR_1509]\n", " [5711, 126, nan]\n", " [9, 503, nan]\n", @@ -12101,8 +12311,8 @@ " [nan, nan, nan]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", - " [[opendoar, celestial], [opendoar, celestial],...\n", - " [[1430, 1509], [1430, 1509], nan]\n", + " [[celestial, opendoar], [celestial, opendoar],...\n", + " [[1509, 1430], [1509, 1430], nan]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", @@ -12214,13 +12424,16 @@ " [nan, nan, [\"zh\", \"en\"]]\n", " [nan, nan, 2019-10-17 14:34:36]\n", " [nan, nan, 2011-10-10 13:13:11]\n", - " [nan, nan, [technology general, mechanical eng...\n", + " [nan, nan, [\"technology general\", \"mechanical ...\n", " [nan, nan, [journal_articles, bibliographic_re...\n", " [nan, nan, [{'name': 'chinese academy of scien...\n", " [nan, nan, []]\n", " [nan, nan, {\"name\": \"dspace\", \"version\": \"\"}]\n", " [nan, nan, http://ir.nimte.ac.cn/casirgrid-oai...\n", " [nan, nan, yes]\n", + " [nan, nan, fully_functional]\n", + " [nan, nan, nan]\n", + " [nan, nan, 4443.0]\n", " [nan, nan, OpenDOAR_2306]\n", " [4379, 4266, nan]\n", " [15, 11, nan]\n", @@ -12284,7 +12497,7 @@ " [nan, nan, nan]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", - " [celestial, [opendoar, celestial], nan]\n", + " [celestial, [celestial, opendoar], nan]\n", " [4668, [4668, 2306], nan]\n", " [nan, nan, nan]\n", " [nan, nan, nan]\n", @@ -12683,12 +12896,12 @@ "3 [nan, nan, 2009-05-01 10:10:47] \n", "4 [nan, nan, 2011-10-10 13:13:11] \n", "\n", - " OpenDOAR_repository_metadata.content_subjects_phrases \\\n", - "0 [nan, [multidisciplinary], [business and econo... \n", - "1 [[multidisciplinary], [multidisciplinary], nan] \n", - "2 [[multidisciplinary], nan, nan] \n", - "3 [nan, nan, [education]] \n", - "4 [nan, nan, [technology general, mechanical eng... \n", + " OpenDOAR_repository_metadata.content_subjects \\\n", + "0 [nan, [\"multidisciplinary\"], [\"business and ec... \n", + "1 [[\"multidisciplinary\"], [\"multidisciplinary\"],... \n", + "2 [[\"multidisciplinary\"], nan, nan] \n", + "3 [nan, nan, [\"education\"]] \n", + "4 [nan, nan, [\"technology general\", \"mechanical ... \n", "\n", " OpenDOAR_repository_metadata.content_types \\\n", "0 [nan, [journal_articles, theses_and_dissertati... \n", @@ -12732,6 +12945,27 @@ "3 [nan, nan, yes] \n", "4 [nan, nan, yes] \n", "\n", + " OpenDOAR_repository_metadata.repository_status \\\n", + "0 [nan, fully_functional, fully_functional] \n", + "1 [fully_functional, fully_functional, nan] \n", + "2 [fully_functional, nan, nan] \n", + "3 [nan, nan, fully_functional] \n", + "4 [nan, nan, fully_functional] \n", + "\n", + " OpenDOAR_repository_metadata.fulltext_record_count \\\n", + "0 [nan, nan, nan] \n", + "1 [nan, nan, nan] \n", + "2 [0.0, nan, nan] \n", + "3 [nan, nan, 0.0] \n", + "4 [nan, nan, nan] \n", + "\n", + " OpenDOAR_repository_metadata.metadata_record_count \\\n", + "0 [nan, 3066.0, nan] \n", + "1 [507.0, nan, nan] \n", + "2 [3397.0, nan, nan] \n", + "3 [nan, nan, 2157.0] \n", + "4 [nan, nan, 4443.0] \n", + "\n", " OpenDOAR_unique_id roar_eprintid roar_rev_number \\\n", "0 [nan, OpenDOAR_2429, OpenDOAR_4320] [4745, nan, nan] [31, nan, nan] \n", "1 [OpenDOAR_3087, OpenDOAR_4500, nan] [nan, nan, 8504] [nan, nan, 12] \n", @@ -12908,7 +13142,7 @@ "4 [geoname_2_CN, geoname_2_CN, nan] [other, other, nan] \n", "\n", " roar_subjects \\\n", - "0 [[GF, HJ, HT, HB, HM, HC, HX, HN, H1, G1, T1, ... \n", + "0 [[HA, HG, GF, HB, HC, JA, HX, HF, T1, HJ, H1, ... \n", "1 [nan, nan, nan] \n", "2 [nan, nan, nan] \n", "3 [nan, nan, nan] \n", @@ -12943,17 +13177,17 @@ "4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n", "\n", " roar_fulltexts_rdocs roar_registry_name \\\n", - "0 [nan, nan, nan] [[opendoar, celestial], nan, nan] \n", - "1 [nan, nan, nan] [nan, nan, [opendoar, celestial]] \n", - "2 [nan, nan, nan] [nan, [opendoar, celestial], [opendoar, celest... \n", - "3 [nan, nan, nan] [[opendoar, celestial], [opendoar, celestial],... \n", - "4 [nan, nan, nan] [celestial, [opendoar, celestial], nan] \n", + "0 [nan, nan, nan] [[celestial, opendoar], nan, nan] \n", + "1 [nan, nan, nan] [nan, nan, [celestial, opendoar]] \n", + "2 [nan, nan, nan] [nan, [celestial, opendoar], [celestial, opend... \n", + "3 [nan, nan, nan] [[celestial, opendoar], [celestial, opendoar],... \n", + "4 [nan, nan, nan] [celestial, [celestial, opendoar], nan] \n", "\n", " roar_registry_id roar_submit_to roar_submitted_to_name \\\n", "0 [[2429, 4818], nan, nan] [nan, nan, nan] [nan, nan, nan] \n", - "1 [nan, nan, [5621, 3087]] [nan, nan, nan] [nan, nan, nan] \n", - "2 [nan, [2318, 4672], [2318, 4672]] [nan, nan, nan] [nan, nan, nan] \n", - "3 [[1430, 1509], [1430, 1509], nan] [nan, nan, nan] [nan, nan, nan] \n", + "1 [nan, nan, [3087, 5621]] [nan, nan, nan] [nan, nan, nan] \n", + "2 [nan, [4672, 2318], [4672, 2318]] [nan, nan, nan] [nan, nan, nan] \n", + "3 [[1509, 1430], [1509, 1430], nan] [nan, nan, nan] [nan, nan, nan] \n", "4 [4668, [4668, 2306], nan] [nan, nan, nan] [nan, nan, nan] \n", "\n", " roar_submitted_to_done roar_webometrics_rank roar_webometrics_size \\\n", @@ -12985,7 +13219,7 @@ "4 [nan, nan, nan] [roar_4379, roar_4266, nan] {roar, OpenDOAR} " ] }, - "execution_count": 28, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -12998,18 +13232,18 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - ":1: PerformanceWarning:\n", + ":1: PerformanceWarning:\n", "\n", "DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", "\n", - ":2: PerformanceWarning:\n", + ":2: PerformanceWarning:\n", "\n", "DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n", "\n" @@ -13131,13 +13365,16 @@ " OpenDOAR_repository_metadata.content_languages\n", " OpenDOAR_system_metadata.date_modified\n", " OpenDOAR_system_metadata.date_created\n", - " OpenDOAR_repository_metadata.content_subjects_phrases\n", + " OpenDOAR_repository_metadata.content_subjects\n", " OpenDOAR_repository_metadata.content_types\n", " OpenDOAR_organization\n", " OpenDOAR_policy_urls\n", " OpenDOAR_repository_metadata.software\n", " OpenDOAR_repository_metadata.oai_url\n", " OpenDOAR_system_metadata.publicly_visible\n", + " OpenDOAR_repository_metadata.repository_status\n", + " OpenDOAR_repository_metadata.fulltext_record_count\n", + " OpenDOAR_repository_metadata.metadata_record_count\n", " OpenDOAR_unique_id\n", " roar_eprintid\n", " roar_rev_number\n", @@ -13279,9 +13516,9 @@ " [2014, nan]\n", " [nan, nan]\n", " [[\"eng\"], nan]\n", - " [[2 Life Sciences, 201 Basic Biological and Me...\n", + " [[{'name': '2 Life Sciences', 'scheme': 'DFG'}...\n", " [nan, nan]\n", - " [[Images, Structured text], nan]\n", + " [[{'name': 'Images', 'scheme': 'parse'}, {'nam...\n", " [[dataProvider], nan]\n", " [[genomes, life sciences, proteins, proteomes,...\n", " [[{'institutionName': 'Georgetown University, ...\n", @@ -13294,7 +13531,7 @@ " [[], nan]\n", " [[\"unknown\"], nan]\n", " [yes, nan]\n", - " [[\"ftp://ftp.pir.georgetown.edu/databases/\", \"...\n", + " [{\"api\": \"ftp://ftp.pir.georgetown.edu/databas...\n", " [[\"none\"], nan]\n", " [nan, nan]\n", " [[], nan]\n", @@ -13400,7 +13637,10 @@ " [nan, nan]\n", " [nan, nan]\n", " [nan, nan]\n", - " {FAIRsharing, re3data}\n", + " [nan, nan]\n", + " [nan, nan]\n", + " [nan, nan]\n", + " {re3data, FAIRsharing}\n", " \n", " \n", " 1\n", @@ -13499,13 +13739,16 @@ " [[\"uk\", \"en\"], nan]\n", " [2019-10-17 14:34:57, nan]\n", " [2015-07-08 12:43:38, nan]\n", - " [[multidisciplinary], nan]\n", + " [[\"multidisciplinary\"], nan]\n", " [[journal_articles, conference_and_workshop_pa...\n", " [[{'name': 'ukrainian catholic university', 'a...\n", " [[], nan]\n", " [{\"name\": \"dspace\", \"version\": \"\"}, nan]\n", " [nan, nan]\n", " [yes, nan]\n", + " [fully_functional, nan]\n", + " [nan, nan]\n", + " [840.0, nan]\n", " [OpenDOAR_3410, nan]\n", " [nan, 10013]\n", " [nan, 31]\n", @@ -13556,7 +13799,7 @@ " [nan, dspace]\n", " [nan, geoname_2_UA]\n", " [nan, other]\n", - " [nan, [H1, L1, AC, D204, B1, D1, DK, BF, BS, H...\n", + " [nan, [D204, BS, BL, B1, D901, DK, H1, HM, L1,...\n", " [nan, 2015-07-07 12:38:37]\n", " [nan, nan]\n", " [nan, nan]\n", @@ -13569,7 +13812,7 @@ " [nan, nan]\n", " [nan, nan]\n", " [nan, nan]\n", - " [nan, [opendoar, celestial]]\n", + " [nan, [celestial, opendoar]]\n", " [nan, [3410, 5883]]\n", " [nan, nan]\n", " [nan, nan]\n", @@ -13581,7 +13824,7 @@ " [nan, nan]\n", " [nan, nan]\n", " [nan, nan]\n", - " [nan, [russell_group, ivy_league]]\n", + " [nan, [ivy_league, russell_group]]\n", " [nan, roar_10013]\n", " {roar, OpenDOAR}\n", " \n", @@ -13645,9 +13888,9 @@ " [nan, 2012-05-22]\n", " [nan, nan]\n", " [nan, [\"eng\"]]\n", - " [nan, [2 Life Sciences, 201 Basic Biological a...\n", + " [nan, [{'name': '2 Life Sciences', 'scheme': '...\n", " [nan, https://sagebionetworks.org/tools_resour...\n", - " [nan, [Raw data, Scientific and statistical da...\n", + " [nan, [{'name': 'Raw data', 'scheme': 'parse'}...\n", " [nan, [dataProvider, serviceProvider]]\n", " [nan, [AMP-AD Knowledge Portal, DREAM Challeng...\n", " [nan, [{'institutionName': 'Alfred P. Sloan Fo...\n", @@ -13660,7 +13903,7 @@ " [nan, []]\n", " [nan, [\"unknown\"]]\n", " [nan, yes]\n", - " [nan, [\"https://docs.synapse.org/rest/\", \"REST\"]]\n", + " [nan, {\"api\": \"https://docs.synapse.org/rest/\"...\n", " [nan, [\"DOI\"]]\n", " [nan, nan]\n", " [nan, []]\n", @@ -13766,7 +14009,10 @@ " [nan, nan]\n", " [nan, nan]\n", " [nan, nan]\n", - " {FAIRsharing, re3data}\n", + " [nan, nan]\n", + " [nan, nan]\n", + " [nan, nan]\n", + " {re3data, FAIRsharing}\n", " \n", " \n", " 3\n", @@ -13865,13 +14111,16 @@ " [nan, [\"tr\"]]\n", " [nan, 2021-05-21 18:05:06]\n", " [nan, 2020-06-02 09:14:18]\n", - " [nan, [multidisciplinary]]\n", + " [nan, [\"multidisciplinary\"]]\n", " [nan, [journal_articles]]\n", " [nan, [{'name': 'giresun university', 'alterna...\n", " [nan, []]\n", " [nan, {\"name\": \"dspace\", \"version\": \"6.2\"}]\n", " [nan, http://acikerisim.giresun.edu.tr/oai/req...\n", " [nan, yes]\n", + " [nan, fully_functional]\n", + " [nan, nan]\n", + " [nan, nan]\n", " [nan, OpenDOAR_9647]\n", " [16034, nan]\n", " [7, nan]\n", @@ -14048,13 +14297,16 @@ " [nan, [\"eu\", \"fr\", \"es\", \"en\"]]\n", " [nan, 2019-10-17 14:34:21]\n", " [nan, 2009-02-02 13:13:26]\n", - " [nan, [multidisciplinary]]\n", + " [nan, [\"multidisciplinary\"]]\n", " [nan, [journal_articles, books_chapters_and_se...\n", " [nan, [{'name': 'euskomedia', 'alternativeName...\n", " [nan, []]\n", " [nan, {\"name\": \"eprints\", \"version\": \"3.0.5\"}]\n", " [nan, http://hedatuz.euskomedia.org/cgi/oai2]\n", " [nan, yes]\n", + " [nan, technically_malfunctioning]\n", + " [nan, nan]\n", + " [nan, 10570.0]\n", " [nan, OpenDOAR_1426]\n", " [610, nan]\n", " [514, nan]\n", @@ -14118,8 +14370,8 @@ " [nan, nan]\n", " [nan, nan]\n", " [nan, nan]\n", - " [[opendoar, celestial], nan]\n", - " [[1294, 1426], nan]\n", + " [[celestial, opendoar], nan]\n", + " [[1426, 1294], nan]\n", " [nan, nan]\n", " [nan, nan]\n", " [nan, nan]\n", @@ -14462,9 +14714,9 @@ "4 [nan, nan] \n", "\n", " re3data_subject \\\n", - "0 [[2 Life Sciences, 201 Basic Biological and Me... \n", + "0 [[{'name': '2 Life Sciences', 'scheme': 'DFG'}... \n", "1 [nan, nan] \n", - "2 [nan, [2 Life Sciences, 201 Basic Biological a... \n", + "2 [nan, [{'name': '2 Life Sciences', 'scheme': '... \n", "3 [nan, nan] \n", "4 [nan, nan] \n", "\n", @@ -14476,9 +14728,9 @@ "4 [nan, nan] \n", "\n", " re3data_contentType \\\n", - "0 [[Images, Structured text], nan] \n", + "0 [[{'name': 'Images', 'scheme': 'parse'}, {'nam... \n", "1 [nan, nan] \n", - "2 [nan, [Raw data, Scientific and statistical da... \n", + "2 [nan, [{'name': 'Raw data', 'scheme': 'parse'}... \n", "3 [nan, nan] \n", "4 [nan, nan] \n", "\n", @@ -14539,9 +14791,9 @@ "4 [nan, nan] [nan, nan] [nan, nan] \n", "\n", " re3data_api re3data_pidSystem \\\n", - "0 [[\"ftp://ftp.pir.georgetown.edu/databases/\", \"... [[\"none\"], nan] \n", + "0 [{\"api\": \"ftp://ftp.pir.georgetown.edu/databas... [[\"none\"], nan] \n", "1 [nan, nan] [nan, nan] \n", - "2 [nan, [\"https://docs.synapse.org/rest/\", \"REST\"]] [nan, [\"DOI\"]] \n", + "2 [nan, {\"api\": \"https://docs.synapse.org/rest/\"... [nan, [\"DOI\"]] \n", "3 [nan, nan] [nan, nan] \n", "4 [nan, nan] [nan, nan] \n", "\n", @@ -14636,12 +14888,12 @@ "3 [nan, 2020-06-02 09:14:18] \n", "4 [nan, 2009-02-02 13:13:26] \n", "\n", - " OpenDOAR_repository_metadata.content_subjects_phrases \\\n", - "0 [nan, nan] \n", - "1 [[multidisciplinary], nan] \n", - "2 [nan, nan] \n", - "3 [nan, [multidisciplinary]] \n", - "4 [nan, [multidisciplinary]] \n", + " OpenDOAR_repository_metadata.content_subjects \\\n", + "0 [nan, nan] \n", + "1 [[\"multidisciplinary\"], nan] \n", + "2 [nan, nan] \n", + "3 [nan, [\"multidisciplinary\"]] \n", + "4 [nan, [\"multidisciplinary\"]] \n", "\n", " OpenDOAR_repository_metadata.content_types \\\n", "0 [nan, nan] \n", @@ -14671,12 +14923,33 @@ "3 [nan, http://acikerisim.giresun.edu.tr/oai/req... \n", "4 [nan, http://hedatuz.euskomedia.org/cgi/oai2] \n", "\n", - " OpenDOAR_system_metadata.publicly_visible OpenDOAR_unique_id \\\n", - "0 [nan, nan] [nan, nan] \n", - "1 [yes, nan] [OpenDOAR_3410, nan] \n", - "2 [nan, nan] [nan, nan] \n", - "3 [nan, yes] [nan, OpenDOAR_9647] \n", - "4 [nan, yes] [nan, OpenDOAR_1426] \n", + " OpenDOAR_system_metadata.publicly_visible \\\n", + "0 [nan, nan] \n", + "1 [yes, nan] \n", + "2 [nan, nan] \n", + "3 [nan, yes] \n", + "4 [nan, yes] \n", + "\n", + " OpenDOAR_repository_metadata.repository_status \\\n", + "0 [nan, nan] \n", + "1 [fully_functional, nan] \n", + "2 [nan, nan] \n", + "3 [nan, fully_functional] \n", + "4 [nan, technically_malfunctioning] \n", + "\n", + " OpenDOAR_repository_metadata.fulltext_record_count \\\n", + "0 [nan, nan] \n", + "1 [nan, nan] \n", + "2 [nan, nan] \n", + "3 [nan, nan] \n", + "4 [nan, nan] \n", + "\n", + " OpenDOAR_repository_metadata.metadata_record_count OpenDOAR_unique_id \\\n", + "0 [nan, nan] [nan, nan] \n", + "1 [840.0, nan] [OpenDOAR_3410, nan] \n", + "2 [nan, nan] [nan, nan] \n", + "3 [nan, nan] [nan, OpenDOAR_9647] \n", + "4 [nan, 10570.0] [nan, OpenDOAR_1426] \n", "\n", " roar_eprintid roar_rev_number roar_eprint_status roar_userid \\\n", "0 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n", @@ -14820,7 +15093,7 @@ "\n", " roar_subjects \\\n", "0 [nan, nan] \n", - "1 [nan, [H1, L1, AC, D204, B1, D1, DK, BF, BS, H... \n", + "1 [nan, [D204, BS, BL, B1, D901, DK, H1, HM, L1,... \n", "2 [nan, nan] \n", "3 [nan, nan] \n", "4 [nan, nan] \n", @@ -14862,10 +15135,10 @@ "\n", " roar_registry_name roar_registry_id \\\n", "0 [nan, nan] [nan, nan] \n", - "1 [nan, [opendoar, celestial]] [nan, [3410, 5883]] \n", + "1 [nan, [celestial, opendoar]] [nan, [3410, 5883]] \n", "2 [nan, nan] [nan, nan] \n", "3 [roarmap, nan] [http://roarmap.eprints.org/1046/, nan] \n", - "4 [[opendoar, celestial], nan] [[1294, 1426], nan] \n", + "4 [[celestial, opendoar], nan] [[1426, 1294], nan] \n", "\n", " roar_submit_to roar_submitted_to_name roar_submitted_to_done \\\n", "0 [nan, nan] [nan, nan] [nan, nan] \n", @@ -14890,20 +15163,20 @@ "\n", " roar_total_deposits roar_association roar_unique_id \\\n", "0 [nan, nan] [nan, nan] [nan, nan] \n", - "1 [nan, nan] [nan, [russell_group, ivy_league]] [nan, roar_10013] \n", + "1 [nan, nan] [nan, [ivy_league, russell_group]] [nan, roar_10013] \n", "2 [nan, nan] [nan, nan] [nan, nan] \n", "3 [nan, nan] [nan, nan] [roar_16034, nan] \n", "4 [nan, nan] [nan, nan] [roar_610, nan] \n", "\n", " source_set \n", - "0 {FAIRsharing, re3data} \n", + "0 {re3data, FAIRsharing} \n", "1 {roar, OpenDOAR} \n", - "2 {FAIRsharing, re3data} \n", + "2 {re3data, FAIRsharing} \n", "3 {roar, OpenDOAR} \n", "4 {roar, OpenDOAR} " ] }, - "execution_count": 29, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -14916,7 +15189,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [