From 722a9aa0cfdc6d53a28f7048fbcc8c28a0a7c65c Mon Sep 17 00:00:00 2001 From: Andrea Mannocci Date: Fri, 8 Oct 2021 14:28:56 +0200 Subject: [PATCH] rewiring subject and geo analysis --- notebooks/02-subjects&geographic.ipynb | 1137 +++++++++++++----------- 1 file changed, 627 insertions(+), 510 deletions(-) diff --git a/notebooks/02-subjects&geographic.ipynb b/notebooks/02-subjects&geographic.ipynb index 29e818e..41a6739 100644 --- a/notebooks/02-subjects&geographic.ipynb +++ b/notebooks/02-subjects&geographic.ipynb @@ -10596,7 +10596,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 69, "metadata": {}, "outputs": [ { @@ -10620,25 +10620,25 @@ " \n", " \n", " \n", - " re3data_id\n", + " orgIdentifier\n", " institution\n", - " org_name\n", - " org_other_names\n", - " org_country\n", - " org_3\n", - " org_noprofit\n", - " org_url\n", - " org_ids\n", - " org_date1\n", - " org_date2\n", - " org_contact\n", + " institutionName\n", + " institutionAdditionalName\n", + " institutionCountry\n", + " responsabilityType\n", + " institutionType\n", + " institutionURL\n", + " institutionIdentifier\n", + " responsibilityStartDate\n", + " responsibilityEndDate\n", + " institutionContact\n", " \n", " \n", " \n", " \n", " 0\n", " r3d100000001\n", - " [Odum Institute for Research in Social Science...\n", + " {'institutionName': 'Odum Institute for Resear...\n", " Odum Institute for Research in Social Science\n", " []\n", " USA\n", @@ -10653,7 +10653,7 @@ " \n", " 1\n", " r3d100000002\n", - " [The U.S. National Archives and Records Admini...\n", + " {'institutionName': 'The U.S. National Archive...\n", " The U.S. National Archives and Records Adminis...\n", " [NARA, National Archives]\n", " USA\n", @@ -10668,7 +10668,7 @@ " \n", " 2\n", " r3d100000002\n", - " [The USA.gov, [], USA, [general], non-profit, ...\n", + " {'institutionName': 'The USA.gov', 'institutio...\n", " The USA.gov\n", " []\n", " USA\n", @@ -10683,7 +10683,7 @@ " \n", " 3\n", " r3d100000004\n", - " [Institut für Deutsche Sprache, Archiv für Ges...\n", + " {'institutionName': 'Institut für Deutsche Spr...\n", " Institut für Deutsche Sprache, Archiv für Gesp...\n", " [AGD]\n", " DEU\n", @@ -10698,7 +10698,7 @@ " \n", " 4\n", " r3d100000005\n", - " [Odum Institute for Research in Social Science...\n", + " {'institutionName': 'Odum Institute for Resear...\n", " Odum Institute for Research in Social Science\n", " []\n", " USA\n", @@ -10715,35 +10715,42 @@ "" ], "text/plain": [ - " re3data_id institution \\\n", - "0 r3d100000001 [Odum Institute for Research in Social Science... \n", - "1 r3d100000002 [The U.S. National Archives and Records Admini... \n", - "2 r3d100000002 [The USA.gov, [], USA, [general], non-profit, ... \n", - "3 r3d100000004 [Institut für Deutsche Sprache, Archiv für Ges... \n", - "4 r3d100000005 [Odum Institute for Research in Social Science... \n", - "\n", - " org_name \\\n", + " orgIdentifier institution \\\n", + "0 r3d100000001 {'institutionName': 'Odum Institute for Resear... \n", + "1 r3d100000002 {'institutionName': 'The U.S. National Archive... \n", + "2 r3d100000002 {'institutionName': 'The USA.gov', 'institutio... \n", + "3 r3d100000004 {'institutionName': 'Institut für Deutsche Spr... \n", + "4 r3d100000005 {'institutionName': 'Odum Institute for Resear... \n", + "\n", + " institutionName \\\n", "0 Odum Institute for Research in Social Science \n", "1 The U.S. National Archives and Records Adminis... \n", "2 The USA.gov \n", "3 Institut für Deutsche Sprache, Archiv für Gesp... \n", "4 Odum Institute for Research in Social Science \n", "\n", - " org_other_names org_country org_3 org_noprofit \\\n", - "0 [] USA [general] non-profit \n", - "1 [NARA, National Archives] USA [general] non-profit \n", - "2 [] USA [general] non-profit \n", - "3 [AGD] DEU [funding, general] non-profit \n", - "4 [] USA [technical] non-profit \n", - "\n", - " org_url org_ids org_date1 org_date2 \\\n", - "0 https://odum.unc.edu/archive/ [] \n", - "1 http://www.archives.gov/ [] \n", - "2 http://www.usa.gov/ [] \n", - "3 http://agd.ids-mannheim.de/index.shtml [] 2004 \n", - "4 https://odum.unc.edu/ [] \n", - "\n", - " org_contact \n", + " institutionAdditionalName institutionCountry responsabilityType \\\n", + "0 [] USA [general] \n", + "1 [NARA, National Archives] USA [general] \n", + "2 [] USA [general] \n", + "3 [AGD] DEU [funding, general] \n", + "4 [] USA [technical] \n", + "\n", + " institutionType institutionURL \\\n", + "0 non-profit https://odum.unc.edu/archive/ \n", + "1 non-profit http://www.archives.gov/ \n", + "2 non-profit http://www.usa.gov/ \n", + "3 non-profit http://agd.ids-mannheim.de/index.shtml \n", + "4 non-profit https://odum.unc.edu/ \n", + "\n", + " institutionIdentifier responsibilityStartDate responsibilityEndDate \\\n", + "0 [] \n", + "1 [] \n", + "2 [] \n", + "3 [] 2004 \n", + "4 [] \n", + "\n", + " institutionContact \n", "0 [] \n", "1 [http://www.archives.gov/contact/] \n", "2 [http://www.usa.gov/Contact.shtml] \n", @@ -10751,33 +10758,30 @@ "4 [https://odum.unc.edu/contact/contact-form/, o... " ] }, - "execution_count": 21, + "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "re3data_institutions = re3data_df.explode('institution')[['re3data_id', 'institution']]\n", + "re3data_institutions = re3data_df.explode('institution')[['orgIdentifier', 'institution']]\n", "re3data_institutions = re3data_institutions[~re3data_institutions.institution.isna()].reset_index(drop=True)\n", - "re3data_institutions = re3data_institutions.join(pd.DataFrame(re3data_institutions.institution.to_list(), columns=['org_name', 'org_other_names', \n", - " 'org_country', 'org_3', 'org_noprofit', \n", - " 'org_url', 'org_ids', 'org_date1', \n", - " 'org_date2', 'org_contact']))\n", + "re3data_institutions = re3data_institutions.join(pd.json_normalize(re3data_institutions.institution))\n", "re3data_institutions.head()" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 70, "metadata": {}, "outputs": [], "source": [ - "re3data_institutions['org_continent'] = re3data_institutions.org_country.map(countrycode_to_continent)" + "re3data_institutions['org_continent'] = re3data_institutions.institutionCountry.map(countrycode_to_continent)" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 71, "metadata": {}, "outputs": [ { @@ -10786,13 +10790,13 @@ "array(['AAA', 'EEC'], dtype=object)" ] }, - "execution_count": 23, + "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "re3data_institutions[re3data_institutions.org_continent.isna()].org_country.unique()" + "re3data_institutions[re3data_institutions.org_continent.isna()].institutionCountry.unique()" ] }, { @@ -10805,11 +10809,11 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 72, "metadata": {}, "outputs": [], "source": [ - "re3data_institutions.loc[re3data_institutions.org_country == 'EEC', 'org_continent'] = 'EU'" + "re3data_institutions.loc[re3data_institutions.institutionCountry == 'EEC', 'org_continent'] = 'EU'" ] }, { @@ -10821,7 +10825,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 77, "metadata": {}, "outputs": [ { @@ -10845,159 +10849,138 @@ " \n", " \n", " \n", - " opendoar_id\n", - " institution\n", - " org_name\n", - " org_other_names\n", - " org_country\n", - " org_3\n", - " org_noprofit\n", - " org_url\n", - " org_ids\n", - " org_date1\n", - " org_date2\n", - " org_contact\n", + " system_metadata.id\n", + " organization\n", + " name\n", + " alternativeName\n", + " country\n", + " url\n", + " identifier\n", + " location.latitude\n", + " location.longiture\n", " \n", " \n", " \n", " \n", " 0\n", - " 101\n", - " [university of utrecht, [universiteit utrecht]...\n", - " university of utrecht\n", - " [universiteit utrecht]\n", - " NLD\n", - " []\n", - " \n", - " https://www.uu.nl\n", - " [https://ror.org/04pp8hn57]\n", - " \n", - " \n", - " []\n", + " 175\n", + " {'name': 'university of hong kong', 'alternati...\n", + " university of hong kong\n", + " hku\n", + " CHN\n", + " http://www.hku.hk\n", + " [{'identifier': 'https://ror.org/02zhqgq86', '...\n", + " 22.2824\n", + " 114.138\n", " \n", " \n", " 1\n", - " 115\n", - " [indian institute of management kozhikode, [ii...\n", - " indian institute of management kozhikode\n", - " [iimk]\n", - " IND\n", - " []\n", - " \n", - " http://www.iimk.ac.in/\n", - " [https://ror.org/03m1xdc36]\n", - " \n", - " \n", - " []\n", + " 64\n", + " {'name': 'central european university', 'alter...\n", + " central european university\n", + " ceu\n", + " HUN\n", + " https://www.ceu.edu\n", + " [{'identifier': 'https://ror.org/02zx40v98', '...\n", + " 47.5005\n", + " 19.0494\n", " \n", " \n", " 2\n", - " 41\n", - " [california institute of technology, [caltech]...\n", - " california institute of technology\n", - " [caltech]\n", - " USA\n", - " []\n", - " \n", - " http://www.caltech.edu/\n", - " [https://ror.org/05dxps055]\n", - " \n", - " \n", - " []\n", + " 151\n", + " {'name': 'european university institute', 'alt...\n", + " european university institute\n", + " eui\n", + " ITA\n", + " http://www.eui.eu/\n", + " [{'identifier': 'https://ror.org/0031wrj91', '...\n", + " 43.8017\n", + " 11.286\n", " \n", " \n", " 3\n", - " 119\n", - " [dublin city university, [dcu], ie, [], , http...\n", - " dublin city university\n", - " [dcu]\n", - " IRL\n", - " []\n", - " \n", - " http://www.dcu.ie/\n", - " [https://ror.org/04a1a1e81]\n", - " \n", - " \n", - " []\n", + " 105\n", + " {'name': 'uhasselt', 'alternativeName': 'hasse...\n", + " uhasselt\n", + " hasselt university\n", + " BEL\n", + " https://www.uhasselt.be\n", + " [{'identifier': 'https://ror.org/04nbhqj75', '...\n", + " 50.9253\n", + " 5.39083\n", " \n", " \n", " 4\n", - " 129\n", - " [istituto nazionale di geofisica e vulcanologi...\n", - " istituto nazionale di geofisica e vulcanologia\n", - " [ingv]\n", - " ITA\n", - " []\n", - " \n", - " http://www.ingv.it\n", - " [https://ror.org/00qps9a02]\n", - " \n", + " 101\n", + " {'name': 'university of utrecht', 'alternative...\n", + " university of utrecht\n", " \n", - " []\n", + " NLD\n", + " https://www.uu.nl\n", + " [{'identifier': 'https://ror.org/04pp8hn57', '...\n", + " 52.0902\n", + " 5.1223\n", " \n", " \n", "\n", "" ], "text/plain": [ - " opendoar_id institution \\\n", - "0 101 [university of utrecht, [universiteit utrecht]... \n", - "1 115 [indian institute of management kozhikode, [ii... \n", - "2 41 [california institute of technology, [caltech]... \n", - "3 119 [dublin city university, [dcu], ie, [], , http... \n", - "4 129 [istituto nazionale di geofisica e vulcanologi... \n", - "\n", - " org_name org_other_names \\\n", - "0 university of utrecht [universiteit utrecht] \n", - "1 indian institute of management kozhikode [iimk] \n", - "2 california institute of technology [caltech] \n", - "3 dublin city university [dcu] \n", - "4 istituto nazionale di geofisica e vulcanologia [ingv] \n", - "\n", - " org_country org_3 org_noprofit org_url \\\n", - "0 NLD [] https://www.uu.nl \n", - "1 IND [] http://www.iimk.ac.in/ \n", - "2 USA [] http://www.caltech.edu/ \n", - "3 IRL [] http://www.dcu.ie/ \n", - "4 ITA [] http://www.ingv.it \n", - "\n", - " org_ids org_date1 org_date2 org_contact \n", - "0 [https://ror.org/04pp8hn57] [] \n", - "1 [https://ror.org/03m1xdc36] [] \n", - "2 [https://ror.org/05dxps055] [] \n", - "3 [https://ror.org/04a1a1e81] [] \n", - "4 [https://ror.org/00qps9a02] [] " + " system_metadata.id organization \\\n", + "0 175 {'name': 'university of hong kong', 'alternati... \n", + "1 64 {'name': 'central european university', 'alter... \n", + "2 151 {'name': 'european university institute', 'alt... \n", + "3 105 {'name': 'uhasselt', 'alternativeName': 'hasse... \n", + "4 101 {'name': 'university of utrecht', 'alternative... \n", + "\n", + " name alternativeName country \\\n", + "0 university of hong kong hku CHN \n", + "1 central european university ceu HUN \n", + "2 european university institute eui ITA \n", + "3 uhasselt hasselt university BEL \n", + "4 university of utrecht NLD \n", + "\n", + " url identifier \\\n", + "0 http://www.hku.hk [{'identifier': 'https://ror.org/02zhqgq86', '... \n", + "1 https://www.ceu.edu [{'identifier': 'https://ror.org/02zx40v98', '... \n", + "2 http://www.eui.eu/ [{'identifier': 'https://ror.org/0031wrj91', '... \n", + "3 https://www.uhasselt.be [{'identifier': 'https://ror.org/04nbhqj75', '... \n", + "4 https://www.uu.nl [{'identifier': 'https://ror.org/04pp8hn57', '... \n", + "\n", + " location.latitude location.longiture \n", + "0 22.2824 114.138 \n", + "1 47.5005 19.0494 \n", + "2 43.8017 11.286 \n", + "3 50.9253 5.39083 \n", + "4 52.0902 5.1223 " ] }, - "execution_count": 25, + "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "opendoar_institutions = opendoar_df.explode('institution')[['opendoar_id', 'institution']]\n", - "opendoar_institutions = opendoar_institutions[~opendoar_institutions.institution.isna()].reset_index(drop=True)\n", - "opendoar_institutions = opendoar_institutions.join(pd.DataFrame(opendoar_institutions.institution.to_list(), columns=['org_name', 'org_other_names', \n", - " 'org_country', 'org_3', 'org_noprofit', \n", - " 'org_url', 'org_ids', 'org_date1', \n", - " 'org_date2', 'org_contact']))\n", - "opendoar_institutions['org_country'] = opendoar_institutions.org_country.map(str.upper, na_action='ignore')\n", - "opendoar_institutions['org_country'] = opendoar_institutions.org_country.map(countrycode_iso2_to_countrycode_iso3, na_action='ignore')\n", + "opendoar_institutions = opendoar_df.explode('organization')[['system_metadata.id', 'organization']]\n", + "opendoar_institutions = opendoar_institutions[~opendoar_institutions.organization.isna()].reset_index(drop=True)\n", + "opendoar_institutions = opendoar_institutions.join(pd.json_normalize(opendoar_institutions.organization))\n", + "opendoar_institutions['country'] = opendoar_institutions.country.map(str.upper, na_action='ignore')\n", + "opendoar_institutions['country'] = opendoar_institutions.country.map(countrycode_iso2_to_countrycode_iso3, na_action='ignore')\n", "opendoar_institutions.head()" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 78, "metadata": {}, "outputs": [], "source": [ - "opendoar_institutions['org_continent'] = opendoar_institutions.org_country.map(countrycode_to_continent)" + "opendoar_institutions['org_continent'] = opendoar_institutions.country.map(countrycode_to_continent)" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 79, "metadata": {}, "outputs": [ { @@ -11006,18 +10989,18 @@ "array([nan, 'UMI'], dtype=object)" ] }, - "execution_count": 27, + "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "opendoar_institutions[opendoar_institutions.org_continent.isna()].org_country.unique()" + "opendoar_institutions[opendoar_institutions.org_continent.isna()].country.unique()" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 80, "metadata": {}, "outputs": [ { @@ -11041,36 +11024,30 @@ " \n", " \n", " \n", - " opendoar_id\n", - " institution\n", - " org_name\n", - " org_other_names\n", - " org_country\n", - " org_3\n", - " org_noprofit\n", - " org_url\n", - " org_ids\n", - " org_date1\n", - " org_date2\n", - " org_contact\n", + " system_metadata.id\n", + " organization\n", + " name\n", + " alternativeName\n", + " country\n", + " url\n", + " identifier\n", + " location.latitude\n", + " location.longiture\n", " org_continent\n", " \n", " \n", " \n", " \n", - " 4349\n", + " 4357\n", " 5379\n", - " [kettering university, [], um, [], , https://w...\n", + " {'name': 'kettering university', 'alternativeN...\n", " kettering university\n", - " []\n", - " UMI\n", - " []\n", " \n", + " UMI\n", " https://www.kettering.edu\n", - " [https://ror.org/03rcspa57]\n", - " \n", - " \n", - " []\n", + " [{'identifier': 'https://ror.org/03rcspa57', '...\n", + " 43.0125\n", + " 83.7125\n", " NA\n", " \n", " \n", @@ -11078,27 +11055,27 @@ "" ], "text/plain": [ - " opendoar_id institution \\\n", - "4349 5379 [kettering university, [], um, [], , https://w... \n", + " system_metadata.id organization \\\n", + "4357 5379 {'name': 'kettering university', 'alternativeN... \n", "\n", - " org_name org_other_names org_country org_3 org_noprofit \\\n", - "4349 kettering university [] UMI [] \n", + " name alternativeName country url \\\n", + "4357 kettering university UMI https://www.kettering.edu \n", "\n", - " org_url org_ids org_date1 \\\n", - "4349 https://www.kettering.edu [https://ror.org/03rcspa57] \n", + " identifier location.latitude \\\n", + "4357 [{'identifier': 'https://ror.org/03rcspa57', '... 43.0125 \n", "\n", - " org_date2 org_contact org_continent \n", - "4349 [] NA " + " location.longiture org_continent \n", + "4357 83.7125 NA " ] }, - "execution_count": 28, + "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "opendoar_institutions.loc[opendoar_institutions.org_country == 'UMI', 'org_continent'] = 'NA'\n", - "opendoar_institutions[opendoar_institutions.org_country == 'UMI']" + "opendoar_institutions.loc[opendoar_institutions.country == 'UMI', 'org_continent'] = 'NA'\n", + "opendoar_institutions[opendoar_institutions.country == 'UMI']" ] }, { @@ -11110,13 +11087,14 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 93, "metadata": {}, "outputs": [], "source": [ - "roar_df['location_country'] = roar_df.location_country.map(str.upper, na_action='ignore')\n", - "roar_df['location_country'] = roar_df.location_country.map(countrycode_iso2_to_countrycode_iso3)\n", - "roar_df['continent'] = roar_df.location_country.map(countrycode_to_continent)" + "roar_institutions = roar_df.explode('location_country')\n", + "roar_institutions['location_country'] = roar_institutions.location_country.map(str.upper, na_action='ignore')\n", + "roar_institutions['location_country'] = roar_institutions.location_country.map(countrycode_iso2_to_countrycode_iso3)\n", + "roar_institutions['continent'] = roar_institutions.location_country.map(countrycode_to_continent)" ] }, { @@ -11128,58 +11106,53 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 101, "metadata": {}, "outputs": [], "source": [ - "fairsharing_df['subjects'] = fairsharing_df.subjects.str.split(pat=',')\n", - "fairsharing_df['countries'] = fairsharing_df.countries.str.split(pat=',')\n", - "\n", - "fairsharing_countries = fairsharing_df.explode('countries')\n", - "fairsharing_countries['countrycode'] = fairsharing_countries.countries.map(country_to_countrycode)\n", + "fairsharing_countries = fairsharing_df.explode('attributes.countries')\n", + "fairsharing_countries['countrycode'] = fairsharing_countries['attributes.countries'].map(country_to_countrycode)\n", "fairsharing_countries['continent'] = fairsharing_countries.countrycode.map(countrycode_to_continent)" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 103, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array(['European Union', 'Republic of Ireland', 'Worldwide', nan],\n", - " dtype=object)" + "array(['Worldwide', 'European Union', nan], dtype=object)" ] }, - "execution_count": 31, + "execution_count": 103, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "fairsharing_countries[fairsharing_countries.countrycode.isna()].countries.unique()" + "fairsharing_countries[fairsharing_countries.countrycode.isna()]['attributes.countries'].unique()" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 105, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array(['European Union', 'Republic of Ireland', 'Worldwide', 'Antarctica',\n", - " nan], dtype=object)" + "array(['Worldwide', 'European Union', nan, 'Antarctica'], dtype=object)" ] }, - "execution_count": 32, + "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "fairsharing_countries[fairsharing_countries.continent.isna()].countries.unique()" + "fairsharing_countries[fairsharing_countries.continent.isna()]['attributes.countries'].unique()" ] }, { @@ -11191,12 +11164,12 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 106, "metadata": {}, "outputs": [], "source": [ - "fairsharing_countries.loc[fairsharing_countries.countries == 'Republic of Ireland', ['countries', 'countrycode', 'continent']] = ['Ireland', 'IE', 'EU']\n", - "fairsharing_countries.loc[fairsharing_countries.countries == 'European Union', ['countrycode', 'continent']] = ['EU', 'EU']" + "fairsharing_countries.loc[fairsharing_countries['attributes.countries'] == 'Republic of Ireland', ['attributes.countries', 'countrycode', 'continent']] = ['Ireland', 'IE', 'EU']\n", + "fairsharing_countries.loc[fairsharing_countries['attributes.countries'] == 'European Union', ['countrycode', 'continent']] = ['EU', 'EU']" ] }, { @@ -11208,7 +11181,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 107, "metadata": {}, "outputs": [ { @@ -11232,25 +11205,87 @@ " \n", " \n", " \n", - " full_name\n", - " short_name\n", - " fs_url\n", - " url\n", - " countries\n", - " subjects\n", + " id\n", + " type\n", + " attributes.created-at\n", + " attributes.updated-at\n", + " attributes.metadata.doi\n", + " attributes.metadata.name\n", + " attributes.metadata.status\n", + " attributes.metadata.contacts\n", + " attributes.metadata.homepage\n", + " attributes.metadata.identifier\n", + " attributes.metadata.description\n", + " attributes.metadata.support-links\n", + " attributes.metadata.year-creation\n", + " attributes.metadata.data-processes\n", + " attributes.legacy-ids\n", + " attributes.fairsharing-registry\n", + " attributes.record-type\n", + " attributes.subjects\n", + " attributes.domains\n", + " attributes.taxonomies\n", + " attributes.user-defined-tags\n", + " attributes.countries\n", + " attributes.name\n", + " attributes.abbreviation\n", + " attributes.url\n", + " attributes.doi\n", + " attributes.fairsharing-licence\n", + " attributes.description\n", + " attributes.publications\n", + " attributes.licence-links\n", + " attributes.metadata.citations\n", + " attributes.metadata.abbreviation\n", + " attributes.metadata.access-points\n", + " attributes.metadata.associated-tools\n", + " attributes.metadata.deprecation-date\n", + " attributes.metadata.deprecation-reason\n", + " attributes.metadata.tombstone\n", " countrycode\n", " continent\n", " \n", " \n", " \n", " \n", - " 915\n", - " Antabif IPT - AntOBIS IPT - GBIF Belgium\n", + " 782\n", + " 2462\n", + " fairsharing-records\n", + " 2017-06-27T13:30:19.000Z\n", + " 2021-09-30T11:35:28.523Z\n", + " 10.25504/FAIRsharing.ewyejx\n", " Antabif IPT - AntOBIS IPT - GBIF Belgium\n", - " https://fairsharing.org/10.25504/FAIRsharing.e...\n", + " ready\n", + " [{'contact-name': 'Anton Van de Putte', 'conta...\n", " http://ipt.biodiversity.aq/\n", - " Antarctica\n", + " 2462\n", + " The Belgium Biodiversity Platform hosts this d...\n", + " [{'url': 'a.heughebaert@biodiversity.be', 'nam...\n", + " NaN\n", + " NaN\n", + " [biodbcore-000944, bsg-d000944]\n", + " Database\n", + " repository\n", " [Biodiversity, Life Science]\n", + " [Taxonomic classification]\n", + " [All]\n", + " []\n", + " Antarctica\n", + " FAIRsharing record for: Antabif IPT - AntOBIS ...\n", + " None\n", + " https://fairsharing.org/10.25504/FAIRsharing.e...\n", + " 10.25504/FAIRsharing.ewyejx\n", + " https://creativecommons.org/licenses/by-sa/4.0...\n", + " This FAIRsharing record describes: The Belgium...\n", + " []\n", + " [{'licence-name': 'Apache License 2.0', 'licen...\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " AQ\n", " NaN\n", " \n", @@ -11259,29 +11294,89 @@ "" ], "text/plain": [ - " full_name \\\n", - "915 Antabif IPT - AntOBIS IPT - GBIF Belgium \n", + " id type attributes.created-at \\\n", + "782 2462 fairsharing-records 2017-06-27T13:30:19.000Z \n", + "\n", + " attributes.updated-at attributes.metadata.doi \\\n", + "782 2021-09-30T11:35:28.523Z 10.25504/FAIRsharing.ewyejx \n", + "\n", + " attributes.metadata.name attributes.metadata.status \\\n", + "782 Antabif IPT - AntOBIS IPT - GBIF Belgium ready \n", + "\n", + " attributes.metadata.contacts \\\n", + "782 [{'contact-name': 'Anton Van de Putte', 'conta... \n", + "\n", + " attributes.metadata.homepage attributes.metadata.identifier \\\n", + "782 http://ipt.biodiversity.aq/ 2462 \n", + "\n", + " attributes.metadata.description \\\n", + "782 The Belgium Biodiversity Platform hosts this d... \n", + "\n", + " attributes.metadata.support-links \\\n", + "782 [{'url': 'a.heughebaert@biodiversity.be', 'nam... \n", + "\n", + " attributes.metadata.year-creation attributes.metadata.data-processes \\\n", + "782 NaN NaN \n", + "\n", + " attributes.legacy-ids attributes.fairsharing-registry \\\n", + "782 [biodbcore-000944, bsg-d000944] Database \n", + "\n", + " attributes.record-type attributes.subjects \\\n", + "782 repository [Biodiversity, Life Science] \n", + "\n", + " attributes.domains attributes.taxonomies \\\n", + "782 [Taxonomic classification] [All] \n", + "\n", + " attributes.user-defined-tags attributes.countries \\\n", + "782 [] Antarctica \n", + "\n", + " attributes.name \\\n", + "782 FAIRsharing record for: Antabif IPT - AntOBIS ... \n", + "\n", + " attributes.abbreviation \\\n", + "782 None \n", + "\n", + " attributes.url \\\n", + "782 https://fairsharing.org/10.25504/FAIRsharing.e... \n", "\n", - " short_name \\\n", - "915 Antabif IPT - AntOBIS IPT - GBIF Belgium \n", + " attributes.doi \\\n", + "782 10.25504/FAIRsharing.ewyejx \n", "\n", - " fs_url \\\n", - "915 https://fairsharing.org/10.25504/FAIRsharing.e... \n", + " attributes.fairsharing-licence \\\n", + "782 https://creativecommons.org/licenses/by-sa/4.0... \n", "\n", - " url countries subjects \\\n", - "915 http://ipt.biodiversity.aq/ Antarctica [Biodiversity, Life Science] \n", + " attributes.description \\\n", + "782 This FAIRsharing record describes: The Belgium... \n", + "\n", + " attributes.publications \\\n", + "782 [] \n", + "\n", + " attributes.licence-links \\\n", + "782 [{'licence-name': 'Apache License 2.0', 'licen... \n", + "\n", + " attributes.metadata.citations attributes.metadata.abbreviation \\\n", + "782 NaN NaN \n", + "\n", + " attributes.metadata.access-points attributes.metadata.associated-tools \\\n", + "782 NaN NaN \n", + "\n", + " attributes.metadata.deprecation-date \\\n", + "782 NaN \n", + "\n", + " attributes.metadata.deprecation-reason attributes.metadata.tombstone \\\n", + "782 NaN NaN \n", "\n", " countrycode continent \n", - "915 AQ NaN " + "782 AQ NaN " ] }, - "execution_count": 34, + "execution_count": 107, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "fairsharing_countries.loc[fairsharing_countries.countries == 'Antarctica', ['countrycode', 'continent']] = ['AQ', np.nan]\n", + "fairsharing_countries.loc[fairsharing_countries['attributes.countries'] == 'Antarctica', ['countrycode', 'continent']] = ['AQ', np.nan]\n", "fairsharing_countries[fairsharing_countries.countrycode == 'AQ']" ] }, @@ -11294,7 +11389,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 114, "metadata": {}, "outputs": [ { @@ -11323,116 +11418,121 @@ "IND", "CHN", "ITA", - "AUT", "NOR", + "AUT", "SWE", "BEL", "DNK", - "POL", "RUS", + "POL", "GRC", "ZAF", + "MEX", "CZE", "IRL", - "MEX", - "BRA", "TWN", + "NZL", "FIN", + "BRA", "PRT", - "NZL", "EST", "KOR", "COL", + "ISR", "SRB", "LTU", "ARG", "SGP", "HUN", - "ISR", - "TUR", "SVN", + "TUR", "ISL", "KEN", - "UKR", + "HKG", + "THA", "ROU", - "IDN", - "LUX", "SVK", - "HKG", - "PER", - "PAK", + "LUX", + "UKR", + "IDN", "LVA", "GRL", - "THA", + "PER", "CHL", + "HRV", + "CYP", + "PAK", + "SDN", "BEN", "PAN", "CMR", - "CYP", - "MKD", - "SDN", - "HRV", "BFA", - "TUN", - "CIV", "GHA", - "SEN", - "PYF", - "PHL", - "NCL", + "MKD", "NAM", - "LBN", "KAZ", - "FJI", + "LBN", + "CIV", + "LKA", + "TUN", + "MWI", + "PHL", + "BIH", + "EGY", "AZE", - "LKA" + "SEN", + "ETH", + "PYF", + "FJI", + "NCL" ], "y": [ - 2690, - 1040, - 573, - 514, + 2980, + 1141, + 603, + 567, + 405, 349, - 294, - 233, - 215, - 118, - 113, - 111, - 88, - 79, - 74, - 62, - 54, - 52, - 39, - 39, + 276, + 236, + 132, + 129, + 127, + 96, + 87, + 76, + 63, + 61, + 60, + 60, + 41, 38, - 33, + 36, 33, 31, - 22, - 21, + 24, + 24, 21, + 20, + 19, + 18, 18, 17, 16, - 15, - 15, - 15, 14, 14, 13, + 13, 11, 11, + 10, + 10, 9, - 9, - 9, - 8, 7, 7, 6, 6, + 5, 4, 4, 4, @@ -11445,6 +11545,7 @@ 3, 3, 3, + 3, 2, 2, 2, @@ -11452,7 +11553,9 @@ 2, 2, 2, - 2, + 1, + 1, + 1, 1, 1, 1, @@ -11478,8 +11581,8 @@ "GBR", "DEU", "ESP", - "PER", "TUR", + "PER", "IDN", "FRA", "BRA", @@ -11495,26 +11598,26 @@ "ARG", "NOR", "CHN", - "PRT", "TWN", + "PRT", "MEX", "SWE", "RUS", "AUT", - "HUN", - "ZAF", - "SRB", "KEN", + "SRB", + "ZAF", + "HUN", "KOR", + "BLR", "GRC", "ECU", - "BLR", "CHE", "BEL", "NGA", "IRL", - "CHL", "CZE", + "CHL", "MYS", "FIN", "DZA", @@ -11531,8 +11634,8 @@ "KAZ", "SVN", "MDA", - "SDN", "UGA", + "SDN", "SAU", "NIC", "ZWE", @@ -11542,46 +11645,46 @@ "SLV", "PHL", "URY", - "EST", "EGY", - "SGP", + "EST", "PSE", - "CYP", - "JAM", - "GHA", + "SGP", "ROU", "PAN", - "LVA", - "MKD", + "JAM", + "GHA", + "CYP", "ETH", - "PAK", - "SVK", - "SEN", "HND", + "MKD", + "ARE", + "LVA", + "SEN", + "SVK", + "PAK", "ISL", - "DOM", "LUX", - "MMR", - "ARE", + "DOM", "MAR", + "LBN", + "IRQ", "BWA", "LBY", + "BOL", "GEO", - "LBN", + "MMR", "ZMB", - "IRQ", "ARM", "AZE", "RWA", "LSO", "MOZ", + "CPV", "PRY", "BIH", "TUN", - "BOL", - "FJI", "NAM", - "CPV", + "FJI", "UMI", "VNM", "TTO", @@ -11604,48 +11707,48 @@ "KWT" ], "y": [ - 910, + 913, 682, - 315, + 314, 280, - 175, - 162, - 161, + 176, + 164, + 164, + 160, 156, - 154, - 151, + 152, 148, 141, - 124, + 127, 105, 99, 98, - 97, - 91, + 98, + 89, 75, - 72, + 73, 67, + 62, 60, 60, - 60, - 50, + 51, 50, - 48, + 49, 46, + 45, + 45, 44, 44, - 44, - 43, 41, 38, - 37, + 38, 37, 35, 33, 30, 30, 27, - 26, + 27, 25, 22, 20, @@ -11663,7 +11766,7 @@ 12, 12, 12, - 11, + 12, 11, 11, 11, @@ -11675,7 +11778,7 @@ 9, 8, 8, - 7, + 8, 7, 6, 6, @@ -11685,7 +11788,8 @@ 5, 5, 5, - 4, + 5, + 5, 4, 4, 4, @@ -11699,9 +11803,8 @@ 3, 3, 3, - 2, - 2, - 2, + 3, + 3, 2, 2, 2, @@ -11741,31 +11844,31 @@ "visible": "legendonly", "x": [ "USA", - "GBR", "DEU", + "GBR", "JPN", "ESP", "BRA", - "TUR", "IDN", + "TUR", "PER", - "POL", "IND", + "POL", "COL", "UKR", "FRA", - "CAN", "ITA", "CHN", + "CAN", "AUS", "TWN", "SWE", - "RUS", "ARG", + "RUS", "PRT", "NOR", - "KOR", "MEX", + "KOR", "ZAF", "NLD", "HUN", @@ -11775,149 +11878,155 @@ "BEL", "BLR", "ECU", - "CHE", "KEN", - "CHL", + "CHE", "IRL", "AUT", + "CHL", "VEN", "FIN", "NZL", + "MDA", "ROU", "DNK", + "DZA", "CZE", "IRN", - "MDA", "NGA", - "DZA", + "PHL", "CUB", - "THA", - "SLV", "SDN", - "PHL", + "SLV", + "THA", "ZWE", "EGY", - "LTU", - "BGD", "BGR", - "KAZ", + "BGD", + "LTU", "SVN", + "KAZ", "HKG", + "TZA", "CRI", "SAU", - "TZA", + "UGA", "CYP", - "URY", + "PSE", "NIC", - "UGA", + "URY", "SGP", "HRV", - "PSE", - "GHA", "PAK", - "EST", + "GHA", "AZE", + "PAN", + "EST", "LVA", "DOM", - "MAR", + "JAM", "BWA", "KGZ", - "PAN", - "JAM", + "MAR", + "MKD", + "IRQ", + "LBN", "ETH", - "BIH", "BOL", - "LBN", "NPL", - "MKD", - "IRQ", - "FJI", - "SEN", - "SVK", + "BIH", + "ARM", "LBY", + "NAM", + "SVK", + "MTQ", + "TUN", + "GLP", + "MOZ", + "SEN", "LKA", + "FJI", + "DMA", "LSO", "LUX", - "ISL", - "TUN", - "DMA", - "ARM", - "MOZ", - "NAM", "GEO", + "GUF", + "ISL", + "ARE", "UMI", "WSM", - "ARE", - "PRI", + "ATF", + "ISR", "SYR", - "CMR", "SOM", + "CMR", "RWA", "QAT", + "PYF", "PRK", - "HND", - "MTQ", + "PRI", + "NCL", + "MWI", "MLT", + "HND", "ALB", - "ISR", "AFG" ], "y": [ - 873, - 258, - 258, - 234, - 195, - 181, - 158, - 151, - 149, + 891, + 262, + 261, + 241, + 199, + 185, + 175, + 162, + 154, + 131, + 128, 126, - 122, - 119, - 111, + 118, 101, + 99, 97, - 96, - 91, - 82, - 80, + 97, + 84, + 83, 76, + 71, 68, - 65, + 61, 59, - 56, 53, - 48, - 48, + 53, + 52, 46, 45, + 42, 41, - 39, 38, 37, 35, - 29, - 29, + 31, + 30, 29, 27, - 26, - 26, - 23, + 27, + 27, + 24, 23, 22, + 18, 17, 17, 16, 16, 16, 15, - 14, + 15, 14, 13, 13, 13, - 13, 12, 12, 11, @@ -11926,6 +12035,7 @@ 10, 10, 10, + 10, 9, 9, 8, @@ -11935,13 +12045,12 @@ 7, 7, 7, - 7, 6, 6, 5, 5, 5, - 4, + 5, 4, 4, 4, @@ -11968,6 +12077,12 @@ 2, 2, 2, + 2, + 2, + 2, + 1, + 1, + 1, 1, 1, 1, @@ -12000,37 +12115,37 @@ "NLD", "ITA", "CAN", - "BEL", "ESP", "JPN", + "BEL", "SWE", "CZE", "NOR", - "DNK", "EU", - "AUT", + "DNK", "FIN", - "IE", "AUS", + "AUT", + "IRL", "ISR", "PRT", "HUN", "GRC", "MLT", "LTU", - "ISL", "LUX", + "ISL", + "HRV", "SVK", "MNE", - "HRV", "IND", "POL", - "KOR", "SGP", + "KOR", + "TWN", "ZAF", "RUS", "NZL", - "TWN", "MEX", "BRA", "SAU", @@ -12038,22 +12153,23 @@ "ARG", "TUR", "BGR", - "EST", - "MAR", - "ROU", + "CRI", "UGA", "CYP", - "PAK", + "MAR", + "ROU", + "EST", "THA", - "CRI", - "SLV", - "TGO", + "PAK", "URY", + "SLV", "PAN", + "TGO", "AQ", "NIC", - "NGA", + "HND", "BEN", + "BIH", "CHL", "CMR", "COL", @@ -12061,8 +12177,8 @@ "ETH", "FRO", "GRL", - "HND", "IDN", + "NGA", "ARE", "KEN", "LVA", @@ -12075,48 +12191,48 @@ "ZWE" ], "y": [ - 686, - 248, - 192, - 162, + 693, + 256, + 197, + 165, 114, + 101, 99, - 96, 91, - 86, - 83, - 83, - 80, - 76, - 71, - 69, + 87, + 85, + 82, + 82, + 73, + 68, + 68, 67, - 66, - 64, - 63, + 65, + 62, 62, 62, - 61, - 60, 59, 58, - 53, - 52, - 52, - 52, - 52, - 51, - 51, - 32, + 58, + 57, + 55, + 50, + 49, + 49, + 49, + 49, + 49, + 48, + 34, + 11, 11, - 10, 10, 9, 9, + 9, 8, 8, - 8, - 8, + 7, 6, 3, 3, @@ -12156,6 +12272,7 @@ 1, 1, 1, + 1, 1 ] } @@ -12979,9 +13096,9 @@ } }, "text/html": [ - "