registries_analysis/notebooks/02-subjects&geographic.ipynb

15048 lines
4.1 MiB
Plaintext
Raw Normal View History

2021-07-22 11:03:05 +02:00
{
"cells": [
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 1,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [],
"source": [
"import ast\n",
"import csv\n",
"import json\n",
"import reverse_geocoder as rg\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"import pycountry_convert\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib_venn import venn2, venn2_circles\n",
"\n",
"import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n",
"import plotly.express as px\n",
"\n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 2,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [],
"source": [
"def country_to_countrycode(country):\n",
" if pd.isna(country):\n",
" return np.nan\n",
" else:\n",
" try:\n",
" return pycountry_convert.country_name_to_country_alpha3(country)\n",
" except:\n",
" return np.nan\n",
" \n",
"def countrycode_iso2_to_countrycode_iso3(country):\n",
" if pd.isna(country):\n",
" return np.nan\n",
" else:\n",
" try:\n",
" return pycountry_convert.country_name_to_country_alpha3(pycountry_convert.country_alpha2_to_country_name(country))\n",
" except:\n",
" return np.nan\n",
"\n",
"def countrycode_to_continent(country_code):\n",
" if pd.isna(country_code):\n",
" return np.nan\n",
" else:\n",
" try:\n",
" return pycountry_convert.country_alpha2_to_continent_code(pycountry_convert.country_alpha3_to_country_alpha2(country_code))\n",
" except:\n",
" return np.nan"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Loading datasets"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**re3data**"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 3,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orgIdentifier</th>\n",
" <th>repositoryName</th>\n",
" <th>repositoryName.language</th>\n",
" <th>additionalName</th>\n",
" <th>repositoryURL</th>\n",
" <th>repositoryIdentifier</th>\n",
" <th>repositoryContact</th>\n",
" <th>description</th>\n",
" <th>description.language</th>\n",
2021-07-22 11:03:05 +02:00
" <th>type</th>\n",
" <th>size</th>\n",
" <th>startDate</th>\n",
" <th>endDate</th>\n",
" <th>repositoryLanguage</th>\n",
2021-07-22 11:03:05 +02:00
" <th>subject</th>\n",
" <th>missionStatementURL</th>\n",
" <th>contentType</th>\n",
" <th>providerType</th>\n",
2021-07-22 11:03:05 +02:00
" <th>keyword</th>\n",
" <th>institution</th>\n",
" <th>policy</th>\n",
" <th>databaseAccess</th>\n",
" <th>databaseLicense</th>\n",
" <th>dataAccess</th>\n",
" <th>dataLicense</th>\n",
" <th>dataUploadType</th>\n",
" <th>dataUploadLicense</th>\n",
" <th>software</th>\n",
" <th>versioning</th>\n",
" <th>api</th>\n",
" <th>pidSystem</th>\n",
" <th>citationGuidelineURL</th>\n",
" <th>aidSystem</th>\n",
" <th>enhancedPublication</th>\n",
" <th>qualityManagement</th>\n",
" <th>certificate</th>\n",
" <th>metadataStandard</th>\n",
" <th>syndication</th>\n",
" <th>remarks</th>\n",
" <th>entryDate</th>\n",
" <th>lastUpdate</th>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>r3d100000001</td>\n",
" <td>Odum Institute Archive Dataverse</td>\n",
" <td>eng</td>\n",
" <td>[]</td>\n",
" <td>https://dataverse.unc.edu/dataverse/odum</td>\n",
" <td>[]</td>\n",
" <td>[\"https://dataverse.unc.edu/dataverse/odum#\", ...</td>\n",
" <td>The Odum Institute Archive Dataverse contains ...</td>\n",
" <td>eng</td>\n",
2021-07-22 11:03:05 +02:00
" <td>[disciplinary]</td>\n",
" <td>{\"size\": \"13 dataverses; 3.050 datasets\", \"upd...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[\"eng\"]</td>\n",
" <td>[{'name': '1 Humanities and Social Sciences', ...</td>\n",
" <td>NaN</td>\n",
" <td>[{'name': 'Databases', 'scheme': 'parse'}, {'n...</td>\n",
2021-07-22 11:03:05 +02:00
" <td>[dataProvider]</td>\n",
" <td>[FAIR, Middle East, crime, demography, economy...</td>\n",
" <td>[{'institutionName': 'Odum Institute for Resea...</td>\n",
" <td>[{\"policyName\": \"Collection Development Policy...</td>\n",
" <td>{\"databaseAccessType\": \"open\", \"databaseAcces...</td>\n",
" <td>[{\"databaseLicenseName\": \"CC0\", \"databaseLicen...</td>\n",
" <td>[{\"dataAccessType\": \"embargoed\", \"dataAccessRe...</td>\n",
" <td>[{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"...</td>\n",
" <td>restricted</td>\n",
" <td>[]</td>\n",
" <td>[\"DataVerse\"]</td>\n",
" <td>NaN</td>\n",
" <td>{}</td>\n",
" <td>[\"DOI\"]</td>\n",
" <td>NaN</td>\n",
" <td>[]</td>\n",
" <td>unknown</td>\n",
" <td>yes</td>\n",
" <td>[\"other\"]</td>\n",
" <td>[{\"metadataStandardName\": \"DDI - Data Document...</td>\n",
" <td>{}</td>\n",
" <td>Odum Dataverse is covered by Thomson Reuters D...</td>\n",
" <td>2013-06-10</td>\n",
" <td>2021-07-06</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>r3d100000002</td>\n",
" <td>Access to Archival Databases</td>\n",
" <td>eng</td>\n",
" <td>[{'additionalName': 'AAD', 'additionalNameLang...</td>\n",
" <td>https://aad.archives.gov/aad/</td>\n",
" <td>[RRID:SCR_010479, RRID:nlx_157752]</td>\n",
" <td>[\"https://www.archives.gov/contact\"]</td>\n",
" <td>You will find in the Access to Archival Databa...</td>\n",
" <td>eng</td>\n",
2021-07-22 11:03:05 +02:00
" <td>[disciplinary]</td>\n",
" <td>{\"size\": \"\", \"updatedp\": \"\"}</td>\n",
" <td>1985</td>\n",
" <td>NaN</td>\n",
" <td>[\"eng\", \"spa\"]</td>\n",
" <td>[{'name': '1 Humanities and Social Sciences', ...</td>\n",
" <td>https://www.archives.gov/publications/general-...</td>\n",
" <td>[{'name': 'Images', 'scheme': 'parse'}, {'name...</td>\n",
2021-07-22 11:03:05 +02:00
" <td>[dataProvider]</td>\n",
" <td>[US History]</td>\n",
" <td>[{'institutionName': 'The U.S. National Archiv...</td>\n",
" <td>[{\"policyName\": \"Contribution Policy\", \"policy...</td>\n",
" <td>{\"databaseAccessType\": \"open\", \"databaseAcces...</td>\n",
" <td>[]</td>\n",
" <td>[{\"dataAccessType\": \"open\", \"dataAccessRestric...</td>\n",
" <td>[{\"dataLicenseName\": \"Copyrights\", \"dataLicens...</td>\n",
" <td>restricted</td>\n",
" <td>[]</td>\n",
" <td>[\"unknown\"]</td>\n",
" <td>no</td>\n",
" <td>{\"api\": \"https://www.archives.gov/developer#to...</td>\n",
" <td>[\"none\"]</td>\n",
" <td>https://aad.archives.gov/aad/help/getting-star...</td>\n",
" <td>[]</td>\n",
" <td>unknown</td>\n",
" <td>unknown</td>\n",
" <td>[]</td>\n",
" <td>[]</td>\n",
" <td>{\"syndication\": \"http://www.archives.gov/socia...</td>\n",
" <td>NaN</td>\n",
" <td>2012-07-04</td>\n",
" <td>2021-05-25</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>r3d100000004</td>\n",
" <td>Datenbank Gesprochenes Deutsch</td>\n",
" <td>deu</td>\n",
" <td>[{'additionalName': 'DGD', 'additionalNameLang...</td>\n",
" <td>https://dgd.ids-mannheim.de/</td>\n",
" <td>[]</td>\n",
" <td>[\"dgd@ids-mannheim.de\"]</td>\n",
" <td>The \"Database for Spoken German (DGD)\" is a co...</td>\n",
" <td>eng</td>\n",
2021-07-22 11:03:05 +02:00
" <td>[disciplinary]</td>\n",
" <td>{\"size\": \"34 corpora\", \"updatedp\": \"2020-02-03\"}</td>\n",
" <td>2012</td>\n",
" <td>NaN</td>\n",
" <td>[\"deu\"]</td>\n",
" <td>[{'name': '1 Humanities and Social Sciences', ...</td>\n",
" <td>https://dgd.ids-mannheim.de/dgd/pragdb.dgd_ext...</td>\n",
" <td>[{'name': 'Audiovisual data', 'scheme': 'parse...</td>\n",
2021-07-22 11:03:05 +02:00
" <td>[dataProvider, serviceProvider]</td>\n",
" <td>[Australian German, FOLK, German dialects, Pfe...</td>\n",
" <td>[{'institutionName': 'Institut für Deutsche Sp...</td>\n",
" <td>[{\"policyName\": \"Erfurter Aufruf zur Sicherung...</td>\n",
" <td>{\"databaseAccessType\": \"restricted\", \"databas...</td>\n",
" <td>[]</td>\n",
" <td>[{\"dataAccessType\": \"restricted\", \"dataAccessR...</td>\n",
" <td>[{\"dataLicenseName\": \"other\", \"dataLicenseURL\"...</td>\n",
" <td>restricted</td>\n",
" <td>[]</td>\n",
" <td>[\"other\"]</td>\n",
" <td>yes</td>\n",
" <td>{}</td>\n",
" <td>[\"none\"]</td>\n",
" <td>http://agd.ids-mannheim.de/konditionen.shtml</td>\n",
" <td>[]</td>\n",
" <td>unknown</td>\n",
" <td>unknown</td>\n",
" <td>[\"RatSWD\"]</td>\n",
" <td>[]</td>\n",
" <td>{}</td>\n",
" <td>NaN</td>\n",
" <td>2012-07-20</td>\n",
" <td>2020-08-27</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>r3d100000005</td>\n",
" <td>UNC Dataverse</td>\n",
" <td>eng</td>\n",
" <td>[{'additionalName': 'University of North Carol...</td>\n",
" <td>https://dataverse.unc.edu/</td>\n",
2022-02-14 13:34:42 +01:00
" <td>[FAIRsharing_doi:10.25504/FAIRsharing.pS2p8c]</td>\n",
" <td>[\"https://dataverse.unc.edu/\", \"odumarchive@un...</td>\n",
" <td>UNC Dataverse is an open-source repository sof...</td>\n",
" <td>eng</td>\n",
2021-07-22 11:03:05 +02:00
" <td>[institutional]</td>\n",
" <td>{\"size\": \"186 dataverses; 25.272 studies; 229....</td>\n",
" <td>2011</td>\n",
" <td>NaN</td>\n",
" <td>[\"eng\"]</td>\n",
" <td>[{'name': '1 Humanities and Social Sciences', ...</td>\n",
" <td>https://odum.unc.edu/about/mission-vision/</td>\n",
" <td>[{'name': 'Archived data', 'scheme': 'parse'},...</td>\n",
2021-07-22 11:03:05 +02:00
" <td>[dataProvider, serviceProvider]</td>\n",
" <td>[FAIR, census, demographic survey, demography,...</td>\n",
" <td>[{'institutionName': 'Odum Institute for Resea...</td>\n",
" <td>[{\"policyName\": \"Collection Development Policy...</td>\n",
" <td>{\"databaseAccessType\": \"open\", \"databaseAcces...</td>\n",
" <td>[]</td>\n",
" <td>[{\"dataAccessType\": \"open\", \"dataAccessRestric...</td>\n",
" <td>[{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"...</td>\n",
" <td>restricted</td>\n",
" <td>[{\"dataUploadLicenseName\": \"Data Deposit Form\"...</td>\n",
" <td>[\"DataVerse\"]</td>\n",
" <td>yes</td>\n",
" <td>{\"api\": \"https://guides.dataverse.org/en/lates...</td>\n",
" <td>[\"ARK\", \"DOI\", \"PURL\", \"URN\", \"hdl\"]</td>\n",
" <td>https://dataverse.org/best-practices/data-cita...</td>\n",
" <td>[]</td>\n",
" <td>unknown</td>\n",
" <td>yes</td>\n",
" <td>[]</td>\n",
" <td>[{\"metadataStandardName\": \"DDI - Data Document...</td>\n",
" <td>{}</td>\n",
" <td>UNC Dataverse is covered by Clarivate Data Cit...</td>\n",
" <td>2012-07-23</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2021-10-25</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>r3d100000006</td>\n",
" <td>Archaeology Data Service</td>\n",
" <td>eng</td>\n",
" <td>[{'additionalName': 'ADS', 'additionalNameLang...</td>\n",
" <td>https://archaeologydataservice.ac.uk/</td>\n",
" <td>[FAIRsharing_doi:10.25504/FAIRsharing.hm1mfg]</td>\n",
" <td>[\"help@archaeologydataservice.ac.uk\", \"https:/...</td>\n",
" <td>The ADS is an accredited digital repository fo...</td>\n",
" <td>eng</td>\n",
2021-07-22 11:03:05 +02:00
" <td>[disciplinary]</td>\n",
" <td>{\"size\": \"1837 results\", \"updatedp\": \"2020-05-...</td>\n",
" <td>1996-10-01</td>\n",
" <td>NaN</td>\n",
" <td>[\"eng\"]</td>\n",
" <td>[{'name': '1 Humanities and Social Sciences', ...</td>\n",
" <td>https://archaeologydataservice.ac.uk/about/our...</td>\n",
" <td>[{'name': 'Archived data', 'scheme': 'parse'},...</td>\n",
2021-07-22 11:03:05 +02:00
" <td>[dataProvider, serviceProvider]</td>\n",
" <td>[FAIR, archaeology, cultural heritage, prehist...</td>\n",
" <td>[{'institutionName': 'Arts and Humanities Rese...</td>\n",
" <td>[{\"policyName\": \"ADS Guides to good practice\",...</td>\n",
" <td>{\"databaseAccessType\": \"open\", \"databaseAcces...</td>\n",
" <td>[{\"databaseLicenseName\": \"CC\", \"databaseLicens...</td>\n",
" <td>[{\"dataAccessType\": \"open\", \"dataAccessRestric...</td>\n",
" <td>[{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"...</td>\n",
" <td>restricted</td>\n",
" <td>[{\"dataUploadLicenseName\": \"Guidelines for Dep...</td>\n",
" <td>[\"other\"]</td>\n",
" <td>yes</td>\n",
" <td>{\"api\": \"https://archaeologydataservice.ac.uk/...</td>\n",
" <td>[\"DOI\"]</td>\n",
" <td>https://archaeologydataservice.ac.uk/advice/te...</td>\n",
" <td>[]</td>\n",
" <td>unknown</td>\n",
" <td>yes</td>\n",
" <td>[\"other\"]</td>\n",
" <td>[{\"metadataStandardName\": \"DataCite Metadata S...</td>\n",
" <td>{\"syndication\": \"https://archaeologydataservic...</td>\n",
" <td>ADS is covered by Clarivate Data Citation Inde...</td>\n",
" <td>2012-07-23</td>\n",
" <td>2021-09-02</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" orgIdentifier repositoryName repositoryName.language \\\n",
"0 r3d100000001 Odum Institute Archive Dataverse eng \n",
"1 r3d100000002 Access to Archival Databases eng \n",
"2 r3d100000004 Datenbank Gesprochenes Deutsch deu \n",
"3 r3d100000005 UNC Dataverse eng \n",
"4 r3d100000006 Archaeology Data Service eng \n",
"\n",
" additionalName \\\n",
"0 [] \n",
"1 [{'additionalName': 'AAD', 'additionalNameLang... \n",
"2 [{'additionalName': 'DGD', 'additionalNameLang... \n",
"3 [{'additionalName': 'University of North Carol... \n",
"4 [{'additionalName': 'ADS', 'additionalNameLang... \n",
"\n",
" repositoryURL \\\n",
"0 https://dataverse.unc.edu/dataverse/odum \n",
"1 https://aad.archives.gov/aad/ \n",
"2 https://dgd.ids-mannheim.de/ \n",
"3 https://dataverse.unc.edu/ \n",
"4 https://archaeologydataservice.ac.uk/ \n",
"\n",
" repositoryIdentifier \\\n",
"0 [] \n",
"1 [RRID:SCR_010479, RRID:nlx_157752] \n",
"2 [] \n",
2022-02-14 13:34:42 +01:00
"3 [FAIRsharing_doi:10.25504/FAIRsharing.pS2p8c] \n",
"4 [FAIRsharing_doi:10.25504/FAIRsharing.hm1mfg] \n",
"\n",
" repositoryContact \\\n",
"0 [\"https://dataverse.unc.edu/dataverse/odum#\", ... \n",
"1 [\"https://www.archives.gov/contact\"] \n",
"2 [\"dgd@ids-mannheim.de\"] \n",
"3 [\"https://dataverse.unc.edu/\", \"odumarchive@un... \n",
"4 [\"help@archaeologydataservice.ac.uk\", \"https:/... \n",
"\n",
" description description.language \\\n",
"0 The Odum Institute Archive Dataverse contains ... eng \n",
"1 You will find in the Access to Archival Databa... eng \n",
"2 The \"Database for Spoken German (DGD)\" is a co... eng \n",
"3 UNC Dataverse is an open-source repository sof... eng \n",
"4 The ADS is an accredited digital repository fo... eng \n",
"\n",
" type size \\\n",
"0 [disciplinary] {\"size\": \"13 dataverses; 3.050 datasets\", \"upd... \n",
"1 [disciplinary] {\"size\": \"\", \"updatedp\": \"\"} \n",
"2 [disciplinary] {\"size\": \"34 corpora\", \"updatedp\": \"2020-02-03\"} \n",
"3 [institutional] {\"size\": \"186 dataverses; 25.272 studies; 229.... \n",
"4 [disciplinary] {\"size\": \"1837 results\", \"updatedp\": \"2020-05-... \n",
"\n",
" startDate endDate repositoryLanguage \\\n",
"0 NaN NaN [\"eng\"] \n",
"1 1985 NaN [\"eng\", \"spa\"] \n",
"2 2012 NaN [\"deu\"] \n",
"3 2011 NaN [\"eng\"] \n",
"4 1996-10-01 NaN [\"eng\"] \n",
2021-07-22 11:03:05 +02:00
"\n",
" subject \\\n",
"0 [{'name': '1 Humanities and Social Sciences', ... \n",
"1 [{'name': '1 Humanities and Social Sciences', ... \n",
"2 [{'name': '1 Humanities and Social Sciences', ... \n",
"3 [{'name': '1 Humanities and Social Sciences', ... \n",
"4 [{'name': '1 Humanities and Social Sciences', ... \n",
"\n",
" missionStatementURL \\\n",
"0 NaN \n",
"1 https://www.archives.gov/publications/general-... \n",
"2 https://dgd.ids-mannheim.de/dgd/pragdb.dgd_ext... \n",
"3 https://odum.unc.edu/about/mission-vision/ \n",
"4 https://archaeologydataservice.ac.uk/about/our... \n",
"\n",
" contentType \\\n",
"0 [{'name': 'Databases', 'scheme': 'parse'}, {'n... \n",
"1 [{'name': 'Images', 'scheme': 'parse'}, {'name... \n",
"2 [{'name': 'Audiovisual data', 'scheme': 'parse... \n",
"3 [{'name': 'Archived data', 'scheme': 'parse'},... \n",
"4 [{'name': 'Archived data', 'scheme': 'parse'},... \n",
2021-07-22 11:03:05 +02:00
"\n",
" providerType \\\n",
2021-07-22 11:03:05 +02:00
"0 [dataProvider] \n",
"1 [dataProvider] \n",
"2 [dataProvider, serviceProvider] \n",
"3 [dataProvider, serviceProvider] \n",
"4 [dataProvider, serviceProvider] \n",
"\n",
" keyword \\\n",
"0 [FAIR, Middle East, crime, demography, economy... \n",
"1 [US History] \n",
"2 [Australian German, FOLK, German dialects, Pfe... \n",
"3 [FAIR, census, demographic survey, demography,... \n",
"4 [FAIR, archaeology, cultural heritage, prehist... \n",
"\n",
" institution \\\n",
"0 [{'institutionName': 'Odum Institute for Resea... \n",
"1 [{'institutionName': 'The U.S. National Archiv... \n",
"2 [{'institutionName': 'Institut für Deutsche Sp... \n",
"3 [{'institutionName': 'Odum Institute for Resea... \n",
"4 [{'institutionName': 'Arts and Humanities Rese... \n",
"\n",
" policy \\\n",
"0 [{\"policyName\": \"Collection Development Policy... \n",
"1 [{\"policyName\": \"Contribution Policy\", \"policy... \n",
"2 [{\"policyName\": \"Erfurter Aufruf zur Sicherung... \n",
"3 [{\"policyName\": \"Collection Development Policy... \n",
"4 [{\"policyName\": \"ADS Guides to good practice\",... \n",
"\n",
" databaseAccess \\\n",
"0 {\"databaseAccessType\": \"open\", \"databaseAcces... \n",
"1 {\"databaseAccessType\": \"open\", \"databaseAcces... \n",
"2 {\"databaseAccessType\": \"restricted\", \"databas... \n",
"3 {\"databaseAccessType\": \"open\", \"databaseAcces... \n",
"4 {\"databaseAccessType\": \"open\", \"databaseAcces... \n",
"\n",
" databaseLicense \\\n",
"0 [{\"databaseLicenseName\": \"CC0\", \"databaseLicen... \n",
"1 [] \n",
"2 [] \n",
"3 [] \n",
"4 [{\"databaseLicenseName\": \"CC\", \"databaseLicens... \n",
"\n",
" dataAccess \\\n",
"0 [{\"dataAccessType\": \"embargoed\", \"dataAccessRe... \n",
"1 [{\"dataAccessType\": \"open\", \"dataAccessRestric... \n",
"2 [{\"dataAccessType\": \"restricted\", \"dataAccessR... \n",
"3 [{\"dataAccessType\": \"open\", \"dataAccessRestric... \n",
"4 [{\"dataAccessType\": \"open\", \"dataAccessRestric... \n",
"\n",
" dataLicense dataUploadType \\\n",
"0 [{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"... restricted \n",
"1 [{\"dataLicenseName\": \"Copyrights\", \"dataLicens... restricted \n",
"2 [{\"dataLicenseName\": \"other\", \"dataLicenseURL\"... restricted \n",
"3 [{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"... restricted \n",
"4 [{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"... restricted \n",
"\n",
" dataUploadLicense software \\\n",
"0 [] [\"DataVerse\"] \n",
"1 [] [\"unknown\"] \n",
"2 [] [\"other\"] \n",
"3 [{\"dataUploadLicenseName\": \"Data Deposit Form\"... [\"DataVerse\"] \n",
"4 [{\"dataUploadLicenseName\": \"Guidelines for Dep... [\"other\"] \n",
"\n",
" versioning api \\\n",
"0 NaN {} \n",
"1 no {\"api\": \"https://www.archives.gov/developer#to... \n",
"2 yes {} \n",
"3 yes {\"api\": \"https://guides.dataverse.org/en/lates... \n",
"4 yes {\"api\": \"https://archaeologydataservice.ac.uk/... \n",
"\n",
" pidSystem \\\n",
"0 [\"DOI\"] \n",
"1 [\"none\"] \n",
"2 [\"none\"] \n",
"3 [\"ARK\", \"DOI\", \"PURL\", \"URN\", \"hdl\"] \n",
"4 [\"DOI\"] \n",
"\n",
" citationGuidelineURL aidSystem \\\n",
"0 NaN [] \n",
"1 https://aad.archives.gov/aad/help/getting-star... [] \n",
"2 http://agd.ids-mannheim.de/konditionen.shtml [] \n",
"3 https://dataverse.org/best-practices/data-cita... [] \n",
"4 https://archaeologydataservice.ac.uk/advice/te... [] \n",
"\n",
" enhancedPublication qualityManagement certificate \\\n",
"0 unknown yes [\"other\"] \n",
"1 unknown unknown [] \n",
"2 unknown unknown [\"RatSWD\"] \n",
"3 unknown yes [] \n",
"4 unknown yes [\"other\"] \n",
"\n",
" metadataStandard \\\n",
"0 [{\"metadataStandardName\": \"DDI - Data Document... \n",
"1 [] \n",
"2 [] \n",
"3 [{\"metadataStandardName\": \"DDI - Data Document... \n",
"4 [{\"metadataStandardName\": \"DataCite Metadata S... \n",
"\n",
" syndication \\\n",
"0 {} \n",
"1 {\"syndication\": \"http://www.archives.gov/socia... \n",
"2 {} \n",
"3 {} \n",
"4 {\"syndication\": \"https://archaeologydataservic... \n",
"\n",
" remarks entryDate lastUpdate \n",
"0 Odum Dataverse is covered by Thomson Reuters D... 2013-06-10 2021-07-06 \n",
"1 NaN 2012-07-04 2021-05-25 \n",
"2 NaN 2012-07-20 2020-08-27 \n",
2022-02-14 13:34:42 +01:00
"3 UNC Dataverse is covered by Clarivate Data Cit... 2012-07-23 2021-10-25 \n",
"4 ADS is covered by Clarivate Data Citation Inde... 2012-07-23 2021-09-02 "
2021-07-22 11:03:05 +02:00
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 3,
2021-07-22 11:03:05 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"re3data_df = pd.read_csv('../data/raw/re3data.tsv', delimiter='\\t',\n",
2021-07-22 11:03:05 +02:00
" converters={'subject': ast.literal_eval,\n",
" 'keyword': ast.literal_eval,\n",
" 'additionalName': ast.literal_eval,\n",
" 'repositoryIdentifier': ast.literal_eval,\n",
2021-07-22 11:03:05 +02:00
" 'type': ast.literal_eval,\n",
" 'contentType': ast.literal_eval,\n",
" 'providerType': ast.literal_eval,\n",
2021-07-22 11:03:05 +02:00
" 'institution': ast.literal_eval\n",
" })\n",
"\n",
2021-07-22 11:03:05 +02:00
"re3data_df.head()"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 4,
2021-07-22 11:03:05 +02:00
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orgIdentifier</th>\n",
" <th>repositoryName</th>\n",
" <th>repositoryName.language</th>\n",
" <th>additionalName</th>\n",
" <th>repositoryURL</th>\n",
" <th>repositoryIdentifier</th>\n",
" <th>repositoryContact</th>\n",
" <th>description</th>\n",
" <th>description.language</th>\n",
2021-07-22 11:03:05 +02:00
" <th>type</th>\n",
" <th>size</th>\n",
" <th>startDate</th>\n",
" <th>endDate</th>\n",
" <th>repositoryLanguage</th>\n",
2021-07-22 11:03:05 +02:00
" <th>subject</th>\n",
" <th>missionStatementURL</th>\n",
" <th>contentType</th>\n",
" <th>providerType</th>\n",
2021-07-22 11:03:05 +02:00
" <th>keyword</th>\n",
" <th>institution</th>\n",
" <th>policy</th>\n",
" <th>databaseAccess</th>\n",
" <th>databaseLicense</th>\n",
" <th>dataAccess</th>\n",
" <th>dataLicense</th>\n",
" <th>dataUploadType</th>\n",
" <th>dataUploadLicense</th>\n",
" <th>software</th>\n",
" <th>versioning</th>\n",
" <th>api</th>\n",
" <th>pidSystem</th>\n",
" <th>citationGuidelineURL</th>\n",
" <th>aidSystem</th>\n",
" <th>enhancedPublication</th>\n",
" <th>qualityManagement</th>\n",
" <th>certificate</th>\n",
" <th>metadataStandard</th>\n",
" <th>syndication</th>\n",
" <th>remarks</th>\n",
" <th>entryDate</th>\n",
" <th>lastUpdate</th>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
2022-02-14 13:34:42 +01:00
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2769</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>1800</td>\n",
" <td>172</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2373</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2778</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>1339</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>1532</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
" <td>1694</td>\n",
" <td>2793</td>\n",
" <td>2793</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
2022-02-14 13:34:42 +01:00
" <td>2793</td>\n",
" <td>2791</td>\n",
" <td>19</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2197</td>\n",
" <td>2766</td>\n",
" <td>1024</td>\n",
" <td>2532</td>\n",
" <td>2792</td>\n",
" <td>6</td>\n",
2021-07-22 11:03:05 +02:00
" <td>9</td>\n",
2022-02-14 13:34:42 +01:00
" <td>1321</td>\n",
" <td>362</td>\n",
" <td>86</td>\n",
" <td>110</td>\n",
" <td>1418</td>\n",
" <td>2304</td>\n",
" <td>1351</td>\n",
" <td>6</td>\n",
" <td>2544</td>\n",
" <td>2773</td>\n",
" <td>2366</td>\n",
" <td>12</td>\n",
2022-02-14 13:34:42 +01:00
" <td>377</td>\n",
" <td>146</td>\n",
" <td>2294</td>\n",
" <td>3</td>\n",
2022-02-14 13:34:42 +01:00
" <td>695</td>\n",
" <td>23</td>\n",
" <td>2</td>\n",
2022-02-14 13:34:42 +01:00
" <td>1170</td>\n",
" <td>29</td>\n",
2022-02-14 13:34:42 +01:00
" <td>1337</td>\n",
" <td>13</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
2022-02-14 13:34:42 +01:00
" <td>16</td>\n",
" <td>175</td>\n",
" <td>544</td>\n",
" <td>1673</td>\n",
" <td>1316</td>\n",
" <td>722</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>r3d100000001</td>\n",
2022-02-14 13:34:42 +01:00
" <td>EarthChem Library</td>\n",
" <td>eng</td>\n",
" <td>[]</td>\n",
" <td>http://icgem.gfz-potsdam.de/home</td>\n",
" <td>[]</td>\n",
" <td>[]</td>\n",
" <td>The National Archives and Records Administrati...</td>\n",
" <td>eng</td>\n",
2021-07-22 11:03:05 +02:00
" <td>[disciplinary]</td>\n",
" <td>{\"size\": \"\", \"updatedp\": \"\"}</td>\n",
" <td>2008</td>\n",
" <td>2015</td>\n",
" <td>[\"eng\"]</td>\n",
" <td>[{'name': '1 Humanities and Social Sciences', ...</td>\n",
" <td>https://learn.scholarsportal.info/all-guides/d...</td>\n",
" <td>[{'name': 'Standard office documents', 'scheme...</td>\n",
" <td>[dataProvider]</td>\n",
2021-07-22 11:03:05 +02:00
" <td>[multidisciplinary]</td>\n",
" <td>[{'institutionName': 'National Center for Biot...</td>\n",
" <td>[][]</td>\n",
" <td>{\"databaseAccessType\": \"open\", \"databaseAcces...</td>\n",
" <td>[]</td>\n",
" <td>[{\"dataAccessType\": \"open\", \"dataAccessRestric...</td>\n",
" <td>[{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"...</td>\n",
" <td>restricted</td>\n",
" <td>[]</td>\n",
" <td>[\"unknown\"]</td>\n",
" <td>yes</td>\n",
" <td>{}</td>\n",
" <td>[\"none\"]</td>\n",
" <td>https://dataverse.org/best-practices/data-cita...</td>\n",
" <td>[]</td>\n",
" <td>unknown</td>\n",
" <td>yes</td>\n",
" <td>[]</td>\n",
" <td>[]</td>\n",
" <td>{}</td>\n",
" <td>is covered by Elsevier.</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2018-08-10</td>\n",
" <td>2021-09-03</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>1</td>\n",
2021-07-22 11:03:05 +02:00
" <td>2</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2596</td>\n",
" <td>587</td>\n",
2021-07-22 11:03:05 +02:00
" <td>2</td>\n",
2022-02-14 13:34:42 +01:00
" <td>1769</td>\n",
" <td>170</td>\n",
" <td>2</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2776</td>\n",
" <td>1768</td>\n",
" <td>1472</td>\n",
" <td>93</td>\n",
" <td>12</td>\n",
" <td>2088</td>\n",
" <td>240</td>\n",
" <td>14</td>\n",
2022-02-14 13:34:42 +01:00
" <td>29</td>\n",
" <td>1806</td>\n",
" <td>205</td>\n",
" <td>7</td>\n",
" <td>319</td>\n",
" <td>2624</td>\n",
" <td>2201</td>\n",
" <td>1292</td>\n",
" <td>71</td>\n",
" <td>1851</td>\n",
" <td>2054</td>\n",
" <td>1216</td>\n",
" <td>1131</td>\n",
" <td>1526</td>\n",
" <td>1359</td>\n",
" <td>76</td>\n",
" <td>2199</td>\n",
" <td>1643</td>\n",
" <td>1569</td>\n",
" <td>2557</td>\n",
" <td>1693</td>\n",
" <td>2235</td>\n",
" <td>17</td>\n",
" <td>20</td>\n",
2022-02-14 13:34:42 +01:00
" <td>104</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2022-02-14 13:34:42 +01:00
" orgIdentifier repositoryName repositoryName.language \\\n",
"count 2793 2793 2793 \n",
"unique 2793 2791 19 \n",
"top r3d100000001 EarthChem Library eng \n",
"freq 1 2 2596 \n",
"\n",
" additionalName repositoryURL repositoryIdentifier \\\n",
"count 2793 2769 2793 \n",
"unique 2197 2766 1024 \n",
"top [] http://icgem.gfz-potsdam.de/home [] \n",
"freq 587 2 1769 \n",
"\n",
" repositoryContact description \\\n",
2022-02-14 13:34:42 +01:00
"count 2793 2793 \n",
"unique 2532 2792 \n",
"top [] The National Archives and Records Administrati... \n",
2022-02-14 13:34:42 +01:00
"freq 170 2 \n",
"\n",
" description.language type size \\\n",
2022-02-14 13:34:42 +01:00
"count 2793 2793 2793 \n",
"unique 6 9 1321 \n",
"top eng [disciplinary] {\"size\": \"\", \"updatedp\": \"\"} \n",
2022-02-14 13:34:42 +01:00
"freq 2776 1768 1472 \n",
"\n",
" startDate endDate repositoryLanguage \\\n",
2022-02-14 13:34:42 +01:00
"count 1800 172 2793 \n",
"unique 362 86 110 \n",
"top 2008 2015 [\"eng\"] \n",
2022-02-14 13:34:42 +01:00
"freq 93 12 2088 \n",
"\n",
" subject \\\n",
2022-02-14 13:34:42 +01:00
"count 2793 \n",
"unique 1418 \n",
"top [{'name': '1 Humanities and Social Sciences', ... \n",
2022-02-14 13:34:42 +01:00
"freq 240 \n",
"\n",
" missionStatementURL \\\n",
2022-02-14 13:34:42 +01:00
"count 2373 \n",
"unique 2304 \n",
"top https://learn.scholarsportal.info/all-guides/d... \n",
"freq 14 \n",
"\n",
" contentType providerType \\\n",
2022-02-14 13:34:42 +01:00
"count 2793 2793 \n",
"unique 1351 6 \n",
"top [{'name': 'Standard office documents', 'scheme... [dataProvider] \n",
2022-02-14 13:34:42 +01:00
"freq 29 1806 \n",
"\n",
" keyword \\\n",
2022-02-14 13:34:42 +01:00
"count 2793 \n",
"unique 2544 \n",
"top [multidisciplinary] \n",
2022-02-14 13:34:42 +01:00
"freq 205 \n",
"\n",
" institution policy \\\n",
2022-02-14 13:34:42 +01:00
"count 2793 2793 \n",
"unique 2773 2366 \n",
"top [{'institutionName': 'National Center for Biot... [][] \n",
2022-02-14 13:34:42 +01:00
"freq 7 319 \n",
"\n",
" databaseAccess databaseLicense \\\n",
2022-02-14 13:34:42 +01:00
"count 2793 2793 \n",
"unique 12 377 \n",
"top {\"databaseAccessType\": \"open\", \"databaseAcces... [] \n",
2022-02-14 13:34:42 +01:00
"freq 2624 2201 \n",
"\n",
" dataAccess \\\n",
2022-02-14 13:34:42 +01:00
"count 2793 \n",
"unique 146 \n",
"top [{\"dataAccessType\": \"open\", \"dataAccessRestric... \n",
2022-02-14 13:34:42 +01:00
"freq 1292 \n",
"\n",
" dataLicense dataUploadType \\\n",
2022-02-14 13:34:42 +01:00
"count 2793 2778 \n",
"unique 2294 3 \n",
"top [{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"... restricted \n",
2022-02-14 13:34:42 +01:00
"freq 71 1851 \n",
"\n",
" dataUploadLicense software versioning api pidSystem \\\n",
2022-02-14 13:34:42 +01:00
"count 2793 2793 1339 2793 2793 \n",
"unique 695 23 2 1170 29 \n",
"top [] [\"unknown\"] yes {} [\"none\"] \n",
2022-02-14 13:34:42 +01:00
"freq 2054 1216 1131 1526 1359 \n",
"\n",
" citationGuidelineURL aidSystem \\\n",
2022-02-14 13:34:42 +01:00
"count 1532 2793 \n",
"unique 1337 13 \n",
"top https://dataverse.org/best-practices/data-cita... [] \n",
2022-02-14 13:34:42 +01:00
"freq 76 2199 \n",
"\n",
" enhancedPublication qualityManagement certificate metadataStandard \\\n",
2022-02-14 13:34:42 +01:00
"count 2793 2793 2793 2793 \n",
"unique 3 3 16 175 \n",
"top unknown yes [] [] \n",
2022-02-14 13:34:42 +01:00
"freq 1643 1569 2557 1693 \n",
"\n",
" syndication remarks entryDate lastUpdate \n",
2022-02-14 13:34:42 +01:00
"count 2793 1694 2793 2793 \n",
"unique 544 1673 1316 722 \n",
"top {} is covered by Elsevier. 2018-08-10 2021-09-03 \n",
"freq 2235 17 20 104 "
2021-07-22 11:03:05 +02:00
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 4,
2021-07-22 11:03:05 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"re3data_df.describe(include='all')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**openDOAR**"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 5,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>system_metadata.id</th>\n",
" <th>repository_metadata.name</th>\n",
" <th>repository_metadata.alternativename</th>\n",
" <th>repository_metadata.url</th>\n",
" <th>repository_metadata.description</th>\n",
" <th>repository_metadata.type</th>\n",
" <th>repository_metadata.content_languages</th>\n",
" <th>system_metadata.date_modified</th>\n",
" <th>system_metadata.date_created</th>\n",
" <th>repository_metadata.content_subjects</th>\n",
" <th>repository_metadata.content_types</th>\n",
" <th>organization</th>\n",
" <th>policy_urls</th>\n",
" <th>repository_metadata.software</th>\n",
" <th>repository_metadata.oai_url</th>\n",
" <th>system_metadata.publicly_visible</th>\n",
" <th>repository_metadata.repository_status</th>\n",
" <th>repository_metadata.fulltext_record_count</th>\n",
" <th>repository_metadata.metadata_record_count</th>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2022-02-14 13:34:42 +01:00
" <td>134</td>\n",
" <td>{\"name\": \"eldorado - repository of the tu dort...</td>\n",
" <td>[{'name': 'eldorado - ressourcen aus und für l...</td>\n",
" <td>https://eldorado.tu-dortmund.de</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:03:05 +02:00
" <td>institutional</td>\n",
" <td>[]</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2022-01-12 15:34:54</td>\n",
" <td>2005-12-19 14:57:52</td>\n",
" <td>[arts, humanities, science, mathematics, socia...</td>\n",
" <td>[journal_articles, conference_and_workshop_pap...</td>\n",
" <td>[{'name': 'technische universität dortmund', '...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"\"}</td>\n",
" <td>https://eldorado.tu-dortmund.de/oai/request</td>\n",
" <td>yes</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>9629.0</td>\n",
" <td>20963.0</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2022-02-14 13:34:42 +01:00
" <td>58</td>\n",
" <td>{\"name\": \"archive ouverte en sciences de linfo...</td>\n",
" <td>[{'acronym': '@rchivesic'}]</td>\n",
" <td>https://archivesic.ccsd.cnrs.fr</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:03:05 +02:00
" <td>institutional</td>\n",
" <td>[]</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2022-01-12 15:34:53</td>\n",
" <td>2006-01-13 12:48:32</td>\n",
" <td>[arts, science, technology, engineering, mathe...</td>\n",
" <td>[journal_articles, conference_and_workshop_pap...</td>\n",
" <td>[{'name': 'centre pour la communication scient...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"hal\", \"version\": \"\"}</td>\n",
" <td>https://api.archives-ouvertes.fr/oai/archivesic</td>\n",
" <td>yes</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>55492.0</td>\n",
" <td>1137498.0</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2022-02-14 13:34:42 +01:00
" <td>93</td>\n",
" <td>{\"name\": \"digitalcommons@the texas medical cen...</td>\n",
" <td>[]</td>\n",
2022-02-14 13:34:42 +01:00
" <td>http://digitalcommons.library.tmc.edu/</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:03:05 +02:00
" <td>institutional</td>\n",
2022-02-14 13:34:42 +01:00
" <td>[]</td>\n",
" <td>2022-01-12 15:34:53</td>\n",
" <td>2006-02-14 11:16:12</td>\n",
" <td>[health and medicine]</td>\n",
" <td>[journal_articles, theses_and_dissertations]</td>\n",
" <td>[{'name': 'texas medical center', 'alternative...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"other\", \"version\": \"\"}</td>\n",
" <td>http://digitalcommons.library.tmc.edu/do/oai/</td>\n",
" <td>yes</td>\n",
2022-02-14 13:34:42 +01:00
" <td>NaN</td>\n",
" <td>2658.0</td>\n",
" <td>7268.0</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2022-02-14 13:34:42 +01:00
" <td>68</td>\n",
" <td>{\"name\": \"cognitive sciences eprint archive\", ...</td>\n",
" <td>[{'acronym': 'cogprints'}]</td>\n",
" <td>http://cogprints.org/</td>\n",
" <td>NaN</td>\n",
" <td>disciplinary</td>\n",
" <td>[]</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2022-01-12 15:34:53</td>\n",
" <td>2006-01-04 15:01:23</td>\n",
" <td>[humanities, health and medicine, science, soc...</td>\n",
" <td>[journal_articles, conference_and_workshop_pap...</td>\n",
2022-02-14 13:34:42 +01:00
" <td>[{'name': 'university of southampton', 'altern...</td>\n",
" <td>[]</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{\"name\": \"eprints\", \"version\": \"\"}</td>\n",
" <td>http://cogprints.org/cgi/oai2</td>\n",
" <td>yes</td>\n",
2022-02-14 13:34:42 +01:00
" <td>NaN</td>\n",
" <td>2895.0</td>\n",
" <td>4277.0</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2022-02-14 13:34:42 +01:00
" <td>84</td>\n",
" <td>{\"name\": \"digital commons@carleton college\", \"...</td>\n",
" <td>[]</td>\n",
2022-02-14 13:34:42 +01:00
" <td>http://digitalcommons.carleton.edu/</td>\n",
" <td>NaN</td>\n",
" <td>institutional</td>\n",
" <td>[]</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2022-01-12 15:34:53</td>\n",
" <td>2006-01-04 16:07:58</td>\n",
" <td>[humanities, science, social sciences]</td>\n",
" <td>[journal_articles, unpub_reports_and_working_p...</td>\n",
" <td>[{'name': 'carleton college', 'alternativeName...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"other\", \"version\": \"\"}</td>\n",
" <td>NaN</td>\n",
" <td>yes</td>\n",
2022-02-14 13:34:42 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>42.0</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" system_metadata.id repository_metadata.name \\\n",
2022-02-14 13:34:42 +01:00
"0 134 {\"name\": \"eldorado - repository of the tu dort... \n",
"1 58 {\"name\": \"archive ouverte en sciences de linfo... \n",
"2 93 {\"name\": \"digitalcommons@the texas medical cen... \n",
"3 68 {\"name\": \"cognitive sciences eprint archive\", ... \n",
"4 84 {\"name\": \"digital commons@carleton college\", \"... \n",
"\n",
" repository_metadata.alternativename \\\n",
"0 [{'name': 'eldorado - ressourcen aus und für l... \n",
"1 [{'acronym': '@rchivesic'}] \n",
"2 [] \n",
"3 [{'acronym': 'cogprints'}] \n",
"4 [] \n",
"\n",
" repository_metadata.url repository_metadata.description \\\n",
"0 https://eldorado.tu-dortmund.de NaN \n",
"1 https://archivesic.ccsd.cnrs.fr NaN \n",
"2 http://digitalcommons.library.tmc.edu/ NaN \n",
"3 http://cogprints.org/ NaN \n",
"4 http://digitalcommons.carleton.edu/ NaN \n",
"\n",
" repository_metadata.type repository_metadata.content_languages \\\n",
"0 institutional [] \n",
"1 institutional [] \n",
"2 institutional [] \n",
"3 disciplinary [] \n",
"4 institutional [] \n",
"\n",
" system_metadata.date_modified system_metadata.date_created \\\n",
"0 2022-01-12 15:34:54 2005-12-19 14:57:52 \n",
"1 2022-01-12 15:34:53 2006-01-13 12:48:32 \n",
"2 2022-01-12 15:34:53 2006-02-14 11:16:12 \n",
"3 2022-01-12 15:34:53 2006-01-04 15:01:23 \n",
"4 2022-01-12 15:34:53 2006-01-04 16:07:58 \n",
"\n",
" repository_metadata.content_subjects \\\n",
2022-02-14 13:34:42 +01:00
"0 [arts, humanities, science, mathematics, socia... \n",
"1 [arts, science, technology, engineering, mathe... \n",
"2 [health and medicine] \n",
"3 [humanities, health and medicine, science, soc... \n",
"4 [humanities, science, social sciences] \n",
"\n",
" repository_metadata.content_types \\\n",
2022-02-14 13:34:42 +01:00
"0 [journal_articles, conference_and_workshop_pap... \n",
"1 [journal_articles, conference_and_workshop_pap... \n",
"2 [journal_articles, theses_and_dissertations] \n",
"3 [journal_articles, conference_and_workshop_pap... \n",
2022-02-14 13:34:42 +01:00
"4 [journal_articles, unpub_reports_and_working_p... \n",
"\n",
" organization policy_urls \\\n",
"0 [{'name': 'technische universität dortmund', '... [] \n",
"1 [{'name': 'centre pour la communication scient... [] \n",
"2 [{'name': 'texas medical center', 'alternative... [] \n",
"3 [{'name': 'university of southampton', 'altern... [] \n",
"4 [{'name': 'carleton college', 'alternativeName... [] \n",
"\n",
" repository_metadata.software \\\n",
"0 {\"name\": \"dspace\", \"version\": \"\"} \n",
"1 {\"name\": \"hal\", \"version\": \"\"} \n",
"2 {\"name\": \"other\", \"version\": \"\"} \n",
"3 {\"name\": \"eprints\", \"version\": \"\"} \n",
"4 {\"name\": \"other\", \"version\": \"\"} \n",
"\n",
" repository_metadata.oai_url \\\n",
"0 https://eldorado.tu-dortmund.de/oai/request \n",
"1 https://api.archives-ouvertes.fr/oai/archivesic \n",
"2 http://digitalcommons.library.tmc.edu/do/oai/ \n",
"3 http://cogprints.org/cgi/oai2 \n",
"4 NaN \n",
"\n",
" system_metadata.publicly_visible repository_metadata.repository_status \\\n",
"0 yes NaN \n",
"1 yes NaN \n",
"2 yes NaN \n",
"3 yes NaN \n",
"4 yes NaN \n",
"\n",
" repository_metadata.fulltext_record_count \\\n",
2022-02-14 13:34:42 +01:00
"0 9629.0 \n",
"1 55492.0 \n",
"2 2658.0 \n",
"3 2895.0 \n",
"4 NaN \n",
"\n",
" repository_metadata.metadata_record_count \n",
2022-02-14 13:34:42 +01:00
"0 20963.0 \n",
"1 1137498.0 \n",
"2 7268.0 \n",
"3 4277.0 \n",
"4 42.0 "
2021-07-22 11:03:05 +02:00
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 5,
2021-07-22 11:03:05 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"opendoar_df = pd.read_csv('../data/raw/openDoar.tsv', delimiter='\\t',\n",
" converters={'repository_metadata.content_subjects': ast.literal_eval,\n",
" 'repository_metadata.alternativename': ast.literal_eval,\n",
" 'repository_metadata.content_types': ast.literal_eval,\n",
" 'organization': ast.literal_eval\n",
2021-07-22 11:03:05 +02:00
" },\n",
" dtype={'system_metadata.id': str})\n",
"\n",
2021-07-22 11:03:05 +02:00
"opendoar_df.head()"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 6,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>system_metadata.id</th>\n",
" <th>repository_metadata.name</th>\n",
" <th>repository_metadata.alternativename</th>\n",
" <th>repository_metadata.url</th>\n",
" <th>repository_metadata.description</th>\n",
" <th>repository_metadata.type</th>\n",
" <th>repository_metadata.content_languages</th>\n",
" <th>system_metadata.date_modified</th>\n",
" <th>system_metadata.date_created</th>\n",
" <th>repository_metadata.content_subjects</th>\n",
" <th>repository_metadata.content_types</th>\n",
" <th>organization</th>\n",
" <th>policy_urls</th>\n",
" <th>repository_metadata.software</th>\n",
" <th>repository_metadata.oai_url</th>\n",
" <th>system_metadata.publicly_visible</th>\n",
" <th>repository_metadata.repository_status</th>\n",
" <th>repository_metadata.fulltext_record_count</th>\n",
" <th>repository_metadata.metadata_record_count</th>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
2022-02-14 13:34:42 +01:00
" <td>5811</td>\n",
" <td>5811</td>\n",
" <td>5811</td>\n",
" <td>5810</td>\n",
" <td>0.0</td>\n",
" <td>5810</td>\n",
" <td>5811</td>\n",
" <td>5811</td>\n",
" <td>5811</td>\n",
" <td>5811</td>\n",
" <td>5811</td>\n",
" <td>5811</td>\n",
" <td>5811</td>\n",
" <td>5811</td>\n",
" <td>4447</td>\n",
" <td>5811</td>\n",
" <td>0.0</td>\n",
" <td>2.292000e+03</td>\n",
" <td>4.184000e+03</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
2022-02-14 13:34:42 +01:00
" <td>5811</td>\n",
" <td>5780</td>\n",
" <td>2116</td>\n",
" <td>5772</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:03:05 +02:00
" <td>4</td>\n",
" <td>1</td>\n",
2022-02-14 13:34:42 +01:00
" <td>171</td>\n",
" <td>5643</td>\n",
" <td>237</td>\n",
" <td>477</td>\n",
" <td>5212</td>\n",
" <td>678</td>\n",
" <td>32</td>\n",
" <td>4415</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
2022-02-14 13:34:42 +01:00
" <td>134</td>\n",
" <td>{\"name\": \"arch\", \"language\": \"en\"}</td>\n",
" <td>[]</td>\n",
" <td>http://harp.lib.hiroshima-u.ac.jp/</td>\n",
2022-02-14 13:34:42 +01:00
" <td>NaN</td>\n",
2021-07-22 11:03:05 +02:00
" <td>institutional</td>\n",
2022-02-14 13:34:42 +01:00
" <td>[]</td>\n",
" <td>2022-01-12 15:35:47</td>\n",
" <td>2020-09-18 12:53:48</td>\n",
2022-02-14 13:34:42 +01:00
" <td>[science, technology, engineering, mathematics...</td>\n",
" <td>[theses_and_dissertations]</td>\n",
" <td>[{'name': 'rijksuniversiteit groningen', 'alte...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"\"}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>https://api.figshare.com/v2/oai</td>\n",
" <td>yes</td>\n",
2022-02-14 13:34:42 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>1</td>\n",
2021-07-22 11:03:05 +02:00
" <td>3</td>\n",
2022-02-14 13:34:42 +01:00
" <td>3656</td>\n",
" <td>3</td>\n",
2022-02-14 13:34:42 +01:00
" <td>NaN</td>\n",
" <td>5161</td>\n",
" <td>5811</td>\n",
" <td>73</td>\n",
" <td>81</td>\n",
" <td>3321</td>\n",
" <td>469</td>\n",
2021-07-22 11:03:05 +02:00
" <td>26</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5131</td>\n",
" <td>2273</td>\n",
" <td>3</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5811</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5.022890e+03</td>\n",
" <td>1.765556e+05</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>4.212648e+04</td>\n",
" <td>6.611068e+06</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.000000e+00</td>\n",
" <td>0.000000e+00</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.000000e+00</td>\n",
2022-02-14 13:34:42 +01:00
" <td>8.937500e+02</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>4.225000e+02</td>\n",
" <td>4.012500e+03</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2.931500e+03</td>\n",
" <td>1.629350e+04</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.817531e+06</td>\n",
" <td>4.200000e+08</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2022-02-14 13:34:42 +01:00
" system_metadata.id repository_metadata.name \\\n",
"count 5811 5811 \n",
"unique 5811 5780 \n",
"top 134 {\"name\": \"arch\", \"language\": \"en\"} \n",
"freq 1 3 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" repository_metadata.alternativename \\\n",
2022-02-14 13:34:42 +01:00
"count 5811 \n",
"unique 2116 \n",
"top [] \n",
2022-02-14 13:34:42 +01:00
"freq 3656 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
2022-02-14 13:34:42 +01:00
" repository_metadata.url repository_metadata.description \\\n",
"count 5810 0.0 \n",
"unique 5772 NaN \n",
"top http://harp.lib.hiroshima-u.ac.jp/ NaN \n",
"freq 3 NaN \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" repository_metadata.type repository_metadata.content_languages \\\n",
2022-02-14 13:34:42 +01:00
"count 5810 5811 \n",
"unique 4 1 \n",
"top institutional [] \n",
"freq 5161 5811 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" system_metadata.date_modified system_metadata.date_created \\\n",
2022-02-14 13:34:42 +01:00
"count 5811 5811 \n",
"unique 171 5643 \n",
"top 2022-01-12 15:35:47 2020-09-18 12:53:48 \n",
"freq 73 81 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
2022-02-14 13:34:42 +01:00
" repository_metadata.content_subjects \\\n",
"count 5811 \n",
"unique 237 \n",
"top [science, technology, engineering, mathematics... \n",
"freq 3321 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" repository_metadata.content_types \\\n",
"count 5811 \n",
"unique 477 \n",
"top [theses_and_dissertations] \n",
"freq 469 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" organization policy_urls \\\n",
2022-02-14 13:34:42 +01:00
"count 5811 5811 \n",
"unique 5212 678 \n",
"top [{'name': 'rijksuniversiteit groningen', 'alte... [] \n",
2022-02-14 13:34:42 +01:00
"freq 26 5131 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
2022-02-14 13:34:42 +01:00
" repository_metadata.software repository_metadata.oai_url \\\n",
"count 5811 4447 \n",
"unique 32 4415 \n",
"top {\"name\": \"dspace\", \"version\": \"\"} https://api.figshare.com/v2/oai \n",
"freq 2273 3 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" system_metadata.publicly_visible \\\n",
"count 5811 \n",
"unique 1 \n",
"top yes \n",
"freq 5811 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" repository_metadata.repository_status \\\n",
"count 0.0 \n",
"unique NaN \n",
"top NaN \n",
"freq NaN \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" repository_metadata.fulltext_record_count \\\n",
2022-02-14 13:34:42 +01:00
"count 2.292000e+03 \n",
"unique NaN \n",
"top NaN \n",
"freq NaN \n",
2022-02-14 13:34:42 +01:00
"mean 5.022890e+03 \n",
"std 4.212648e+04 \n",
"min 0.000000e+00 \n",
"25% 0.000000e+00 \n",
2022-02-14 13:34:42 +01:00
"50% 4.225000e+02 \n",
"75% 2.931500e+03 \n",
"max 1.817531e+06 \n",
"\n",
" repository_metadata.metadata_record_count \n",
2022-02-14 13:34:42 +01:00
"count 4.184000e+03 \n",
"unique NaN \n",
"top NaN \n",
"freq NaN \n",
2022-02-14 13:34:42 +01:00
"mean 1.765556e+05 \n",
"std 6.611068e+06 \n",
"min 0.000000e+00 \n",
2022-02-14 13:34:42 +01:00
"25% 8.937500e+02 \n",
"50% 4.012500e+03 \n",
"75% 1.629350e+04 \n",
"max 4.200000e+08 "
2021-07-22 11:03:05 +02:00
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 6,
2021-07-22 11:03:05 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"opendoar_df.describe(include='all')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**ROAR**"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 7,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>eprintid</th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
2021-07-22 11:03:05 +02:00
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
2021-07-22 11:03:05 +02:00
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
2021-07-22 11:03:05 +02:00
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>633</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/00/01</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>2011-07-18 05:40:07</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>subject</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://archivesic.ccsd.cnrs.fr/</td>\n",
" <td>@RCHIVESIC</td>\n",
" <td>http://archivesic.ccsd.cnrs.fr/oai/oai.php</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>hal</td>\n",
" <td>geoname_2_FR</td>\n",
" <td>other</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>2002-05-17 19:24:41</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>25</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[opendoar, celestial]</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[669, 58]</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>511</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/00/10</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>2011-07-18 05:40:13</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>institutional</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://www.diva-portal.org/mdh/</td>\n",
" <td>Academic Archive On-line (Mälardalen Universit...</td>\n",
" <td>http://www.diva-portal.org/oai/mdh/OAI</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>se</td>\n",
" <td>Uppsala</td>\n",
" <td>59.8667</td>\n",
" <td>17.6333</td>\n",
" <td>diva</td>\n",
" <td>geoname_2_SE</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2005-12-08 13:15:22</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>100</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[opendoar, celestial]</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[258, 526]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1000</td>\n",
" <td>274</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/10/00</td>\n",
" <td>2010-01-06 13:45:01</td>\n",
" <td>2011-07-06 08:21:21</td>\n",
" <td>2010-01-06 13:45:01</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://pam.pisharp.org/</td>\n",
" <td>PAM - Portuguese Archive of Mathematics</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>pt</td>\n",
" <td>Bellevue, WA</td>\n",
" <td>47.6034</td>\n",
" <td>-122.155</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_PT</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2006-05-04 10:48:14</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10001</td>\n",
" <td>20</td>\n",
" <td>archive</td>\n",
" <td>91</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/01/00/01</td>\n",
" <td>2015-08-08 14:52:11</td>\n",
" <td>2016-03-21 19:44:01</td>\n",
" <td>2015-08-08 14:52:11</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://edoc.sub.uni-hamburg.de/klimawandel/</td>\n",
" <td>Klimawandel Dokumentenserver</td>\n",
" <td>http://edoc.sub.uni-hamburg.de/klimawandel/oai</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>The \"Documentenserver Klimawandel\" (Repository...</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[Helmholtz-Zentrum Geesthacht, Climate Service...</td>\n",
" <td>[http://www.climateservicecenter.de/, http://w...</td>\n",
" <td>de</td>\n",
" <td>Hamburg</td>\n",
" <td>53.5511</td>\n",
" <td>9.9937</td>\n",
" <td>opus</td>\n",
" <td>geoname_2_DE</td>\n",
" <td>other</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[GE, S1, GF, HD, G1]</td>\n",
" <td>2015-07-02 08:08:31</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[opendoar, celestial]</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[3408, 5881]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:03:05 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10008</td>\n",
" <td>11</td>\n",
" <td>archive</td>\n",
" <td>404</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/01/00/08</td>\n",
" <td>2015-08-08 14:52:26</td>\n",
" <td>2016-03-21 19:43:51</td>\n",
" <td>2015-08-08 14:52:26</td>\n",
" <td>institutional</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://creativematter.skidmore.edu/</td>\n",
" <td>Creative Matter | Skidmore College Research</td>\n",
" <td>http://creativematter.skidmore.edu/do/oai/</td>\n",
" <td>NaN</td>\n",
" <td>http://creativematter.skidmore.edu/recent.rss</td>\n",
" <td>NaN</td>\n",
" <td>Welcome to Creative Matter, a repository for t...</td>\n",
" <td>TRUE</td>\n",
" <td>FALSE</td>\n",
" <td>FALSE</td>\n",
" <td>Skidmore College</td>\n",
" <td>http://www.skidmore.edu/</td>\n",
" <td>us</td>\n",
" <td>Saratoga Springs</td>\n",
" <td>43.0961</td>\n",
" <td>-73.7818</td>\n",
" <td>bepress</td>\n",
" <td>geoname_2_US</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2015-07-06 17:35:50</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>5882</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" eprintid rev_number eprint_status userid importid source \\\n",
"0 1 633 archive 1 NaN NaN \n",
"1 10 511 archive 1 NaN NaN \n",
"2 1000 274 archive 1 NaN NaN \n",
"3 10001 20 archive 91 NaN NaN \n",
"4 10008 11 archive 404 NaN NaN \n",
"\n",
" dir datestamp lastmod \\\n",
"0 disk0/00/00/00/01 2010-01-06 13:43:48 2011-07-18 05:40:07 \n",
"1 disk0/00/00/00/10 2010-01-06 13:43:48 2011-07-18 05:40:13 \n",
"2 disk0/00/00/10/00 2010-01-06 13:45:01 2011-07-06 08:21:21 \n",
"3 disk0/00/01/00/01 2015-08-08 14:52:11 2016-03-21 19:44:01 \n",
"4 disk0/00/01/00/08 2015-08-08 14:52:26 2016-03-21 19:43:51 \n",
"\n",
" status_changed type succeeds commentary \\\n",
"0 2010-01-06 13:43:48 subject NaN NaN \n",
"1 2010-01-06 13:43:48 institutional NaN NaN \n",
"2 2010-01-06 13:45:01 subject NaN NaN \n",
"3 2015-08-08 14:52:11 subject NaN NaN \n",
"4 2015-08-08 14:52:26 institutional NaN NaN \n",
"\n",
" metadata_visibility latitude longitude relation_type relation_uri \\\n",
"0 show NaN NaN NaN NaN \n",
"1 show NaN NaN NaN NaN \n",
"2 show NaN NaN NaN NaN \n",
"3 show NaN NaN NaN NaN \n",
"4 show NaN NaN NaN NaN \n",
"\n",
" item_issues_id item_issues_type item_issues_description \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
"0 NaN NaN 0 \n",
"1 NaN NaN 0 \n",
"2 NaN NaN 0 \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" sword_depositor sword_slug exemplar \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" home_page \\\n",
"0 http://archivesic.ccsd.cnrs.fr/ \n",
"1 http://www.diva-portal.org/mdh/ \n",
"2 http://pam.pisharp.org/ \n",
"3 http://edoc.sub.uni-hamburg.de/klimawandel/ \n",
"4 http://creativematter.skidmore.edu/ \n",
"\n",
" title \\\n",
"0 @RCHIVESIC \n",
"1 Academic Archive On-line (Mälardalen Universit... \n",
"2 PAM - Portuguese Archive of Mathematics \n",
"3 Klimawandel Dokumentenserver \n",
"4 Creative Matter | Skidmore College Research \n",
"\n",
" oai_pmh sword_endpoint \\\n",
"0 http://archivesic.ccsd.cnrs.fr/oai/oai.php NaN \n",
"1 http://www.diva-portal.org/oai/mdh/OAI NaN \n",
"2 NaN NaN \n",
"3 http://edoc.sub.uni-hamburg.de/klimawandel/oai NaN \n",
"4 http://creativematter.skidmore.edu/do/oai/ NaN \n",
"\n",
" rss_feed twitter_feed \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 http://creativematter.skidmore.edu/recent.rss NaN \n",
"\n",
" description fulltext open_access \\\n",
"0 NaN NaN NaN \n",
"1 NaN TRUE TRUE \n",
"2 NaN TRUE TRUE \n",
"3 The \"Documentenserver Klimawandel\" (Repository... TRUE TRUE \n",
"4 Welcome to Creative Matter, a repository for t... TRUE FALSE \n",
"\n",
" mandate organisation_title \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
2022-03-17 10:33:11 +01:00
"3 TRUE [Helmholtz-Zentrum Geesthacht, Climate Service... \n",
"4 FALSE Skidmore College \n",
"\n",
" organisation_home_page location_country \\\n",
"0 NaN fr \n",
"1 NaN se \n",
"2 NaN pt \n",
"3 [http://www.climateservicecenter.de/, http://w... de \n",
"4 http://www.skidmore.edu/ us \n",
"\n",
" location_city location_latitude location_longitude software \\\n",
"0 NaN NaN NaN hal \n",
"1 Uppsala 59.8667 17.6333 diva \n",
"2 Bellevue, WA 47.6034 -122.155 dspace \n",
"3 Hamburg 53.5511 9.9937 opus \n",
"4 Saratoga Springs 43.0961 -73.7818 bepress \n",
"\n",
" geoname version subjects date note \\\n",
"0 geoname_2_FR other NaN 2002-05-17 19:24:41 NaN \n",
"1 geoname_2_SE other NaN 2005-12-08 13:15:22 NaN \n",
"2 geoname_2_PT other NaN 2006-05-04 10:48:14 NaN \n",
2022-03-17 10:33:11 +01:00
"3 geoname_2_DE other [GE, S1, GF, HD, G1] 2015-07-02 08:08:31 NaN \n",
"4 geoname_2_US other NaN 2015-07-06 17:35:50 NaN \n",
"\n",
" suggestions activity_low activity_medium activity_high recordcount \\\n",
"0 NaN 0 0 0 25 \n",
"1 NaN 0 0 0 100 \n",
"2 NaN NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN \n",
"\n",
" recordhistory fulltexts_total \\\n",
"0 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... NaN \n",
"1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100... NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name \\\n",
"0 NaN NaN NaN [opendoar, celestial] \n",
"1 NaN NaN NaN [opendoar, celestial] \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN [opendoar, celestial] \n",
"4 NaN NaN NaN celestial \n",
"\n",
" registry_id submit_to submitted_to_name submitted_to_done \\\n",
2022-03-17 10:33:11 +01:00
"0 [669, 58] NaN NaN NaN \n",
"1 [258, 526] NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
2022-03-17 10:33:11 +01:00
"3 [3408, 5881] NaN NaN NaN \n",
"4 5882 NaN NaN NaN \n",
"\n",
" webometrics_rank webometrics_size webometrics_visibility \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" webometrics_rich_files webometrics_scholar monthly_deposits total_deposits \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" association \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN "
2021-07-22 11:03:05 +02:00
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 7,
2021-07-22 11:03:05 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df = pd.read_csv('../data/raw/export_roar_CSV.csv', dtype='str')\n",
"roar_df = roar_df.groupby('eprintid').aggregate(set)\n",
"\n",
"def value_or_list(cell_set):\n",
" copy = set(cell_set)\n",
" copy.discard(np.nan) \n",
" if len(copy) == 0:\n",
" return np.nan\n",
" if len(copy) == 1:\n",
" return copy.pop()\n",
" return list(copy)\n",
" \n",
"roar_df = roar_df.applymap(value_or_list)\n",
"roar_df.reset_index(inplace=True)\n",
"\n",
"roar_df.head()"
2021-07-22 11:03:05 +02:00
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 8,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>eprintid</th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
2021-07-22 11:03:05 +02:00
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
2022-02-14 13:34:42 +01:00
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>108</td>\n",
" <td>0.0</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5444</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>63</td>\n",
" <td>63</td>\n",
" <td>63</td>\n",
" <td>63</td>\n",
" <td>63</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2242</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2022-02-14 13:34:42 +01:00
" <td>268</td>\n",
" <td>5437</td>\n",
" <td>5442</td>\n",
" <td>4332</td>\n",
" <td>178</td>\n",
" <td>1538</td>\n",
" <td>116</td>\n",
" <td>3837</td>\n",
" <td>4197</td>\n",
" <td>4197</td>\n",
" <td>3746</td>\n",
" <td>4460</td>\n",
" <td>4286</td>\n",
" <td>5138</td>\n",
" <td>3714</td>\n",
" <td>3725</td>\n",
" <td>3708</td>\n",
" <td>4700</td>\n",
" <td>4730</td>\n",
" <td>5444</td>\n",
" <td>1289</td>\n",
" <td>5429</td>\n",
" <td>218</td>\n",
" <td>189</td>\n",
" <td>2288</td>\n",
" <td>2288</td>\n",
" <td>2288</td>\n",
" <td>2290</td>\n",
" <td>2288</td>\n",
" <td>270</td>\n",
" <td>258</td>\n",
" <td>270</td>\n",
" <td>258</td>\n",
2022-02-14 13:34:42 +01:00
" <td>4605</td>\n",
" <td>4580</td>\n",
" <td>375</td>\n",
" <td>205</td>\n",
" <td>205</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>756</td>\n",
" <td>756</td>\n",
2022-02-14 13:34:42 +01:00
" <td>223</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
2022-02-14 13:34:42 +01:00
" <td>5444</td>\n",
" <td>660</td>\n",
" <td>1</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2189</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5444</td>\n",
" <td>4198</td>\n",
" <td>4043</td>\n",
" <td>4230</td>\n",
" <td>12</td>\n",
2022-02-14 13:34:42 +01:00
" <td>108</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>48</td>\n",
" <td>5</td>\n",
" <td>62</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5271</td>\n",
" <td>5143</td>\n",
" <td>4059</td>\n",
" <td>172</td>\n",
" <td>1485</td>\n",
" <td>112</td>\n",
" <td>3359</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
2022-02-14 13:34:42 +01:00
" <td>3858</td>\n",
" <td>3831</td>\n",
" <td>144</td>\n",
" <td>1884</td>\n",
" <td>2923</td>\n",
" <td>2953</td>\n",
" <td>31</td>\n",
" <td>126</td>\n",
" <td>53</td>\n",
2022-02-14 13:34:42 +01:00
" <td>938</td>\n",
" <td>4898</td>\n",
" <td>210</td>\n",
" <td>173</td>\n",
" <td>72</td>\n",
" <td>54</td>\n",
" <td>16</td>\n",
" <td>741</td>\n",
2022-02-14 13:34:42 +01:00
" <td>1702</td>\n",
" <td>135</td>\n",
" <td>118</td>\n",
" <td>134</td>\n",
" <td>117</td>\n",
2022-02-14 13:34:42 +01:00
" <td>7</td>\n",
2022-03-17 10:33:11 +01:00
" <td>4261</td>\n",
" <td>7</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>146</td>\n",
" <td>143</td>\n",
" <td>346</td>\n",
" <td>342</td>\n",
" <td>3</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>1</td>\n",
" <td>11</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/00/01</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>2011-07-06 08:24:53</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>institutional</td>\n",
" <td>10164</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>bad_oai_pmh_url_0</td>\n",
" <td>duplicate_title</td>\n",
" <td>Duplicate title to &lt;xhtml:table xmlns:xhtml=\"h...</td>\n",
" <td>2010-01-13 10:44:49</td>\n",
" <td>discovered</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>FALSE</td>\n",
" <td>http://eprints.upnjatim.ac.id/</td>\n",
" <td>Repositorio Institucional</td>\n",
" <td>http://kce.docressources.info/ws/PMBWs_2</td>\n",
" <td>http://producao.usp.br/sword/servicedocument</td>\n",
" <td>http://eprints.upnjatim.ac.id/cgi/latest_tool?...</td>\n",
" <td>http://my.indexcopernicus.com/fredemoreno</td>\n",
" <td>info:other:archives.eprints.org:import</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>FALSE</td>\n",
" <td>Chinese Academy of Science (中国科学院)</td>\n",
" <td>http://www.cas.cn/</td>\n",
" <td>us</td>\n",
" <td>Lima</td>\n",
" <td>34.1607</td>\n",
" <td>-118.139</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_US</td>\n",
" <td>other</td>\n",
" <td>K1</td>\n",
" <td>2006-05-04 10:48:14</td>\n",
2022-02-14 13:34:42 +01:00
" <td>¿Quién puede depositar documentos en el reposi...</td>\n",
" <td>This repository is hosted by the Texas Digital...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>100</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>[opendoar, celestial]</td>\n",
" <td>2479</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[opendoar, roarmap, celestial]</td>\n",
" <td>opendoar</td>\n",
" <td>2021-01-25</td>\n",
" <td>24</td>\n",
" <td>46</td>\n",
" <td>20</td>\n",
" <td>824</td>\n",
" <td>806</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>0</td>\n",
" <td>russell_group</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>1</td>\n",
2022-02-14 13:34:42 +01:00
" <td>333</td>\n",
" <td>5444</td>\n",
" <td>1330</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>16</td>\n",
" <td>8</td>\n",
" <td>16</td>\n",
2022-02-14 13:34:42 +01:00
" <td>3853</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5402</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>15</td>\n",
" <td>33</td>\n",
" <td>2</td>\n",
" <td>45</td>\n",
" <td>38</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2201</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>261</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>112</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2805</td>\n",
" <td>2696</td>\n",
" <td>2748</td>\n",
" <td>9</td>\n",
" <td>9</td>\n",
2022-02-14 13:34:42 +01:00
" <td>891</td>\n",
" <td>74</td>\n",
" <td>25</td>\n",
" <td>25</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2341</td>\n",
" <td>845</td>\n",
" <td>4841</td>\n",
" <td>53</td>\n",
" <td>99</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2012</td>\n",
" <td>2074</td>\n",
" <td>2210</td>\n",
" <td>730</td>\n",
" <td>95</td>\n",
" <td>113</td>\n",
" <td>114</td>\n",
" <td>113</td>\n",
" <td>114</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2106</td>\n",
" <td>4</td>\n",
2022-02-14 13:34:42 +01:00
" <td>119</td>\n",
" <td>205</td>\n",
" <td>205</td>\n",
" <td>1</td>\n",
2021-07-22 11:03:05 +02:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>387</td>\n",
" <td>387</td>\n",
2022-02-14 13:34:42 +01:00
" <td>130</td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" eprintid rev_number eprint_status userid importid source \\\n",
2022-02-14 13:34:42 +01:00
"count 5444 5444 5444 5444 0.0 0.0 \n",
"unique 5444 660 1 2189 NaN NaN \n",
"top 1 11 archive 1 NaN NaN \n",
2022-02-14 13:34:42 +01:00
"freq 1 333 5444 1330 NaN NaN \n",
"mean NaN NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN NaN \n",
"\n",
" dir datestamp lastmod \\\n",
2022-02-14 13:34:42 +01:00
"count 5444 5444 5444 \n",
"unique 5444 4198 4043 \n",
"top disk0/00/00/00/01 2010-01-06 13:43:48 2011-07-06 08:24:53 \n",
"freq 1 16 8 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" status_changed type succeeds commentary \\\n",
2022-02-14 13:34:42 +01:00
"count 5444 5444 108 0.0 \n",
"unique 4230 12 108 NaN \n",
"top 2010-01-06 13:43:48 institutional 10164 NaN \n",
2022-02-14 13:34:42 +01:00
"freq 16 3853 1 NaN \n",
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n",
"\n",
" metadata_visibility latitude longitude relation_type relation_uri \\\n",
2022-02-14 13:34:42 +01:00
"count 5444 0.0 0.0 0.0 0.0 \n",
"unique 2 NaN NaN NaN NaN \n",
"top show NaN NaN NaN NaN \n",
2022-02-14 13:34:42 +01:00
"freq 5402 NaN NaN NaN NaN \n",
"mean NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN \n",
"\n",
" item_issues_id item_issues_type \\\n",
"count 63 63 \n",
"unique 48 5 \n",
"top bad_oai_pmh_url_0 duplicate_title \n",
"freq 15 33 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" item_issues_description \\\n",
"count 63 \n",
"unique 62 \n",
"top Duplicate title to <xhtml:table xmlns:xhtml=\"h... \n",
"freq 2 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
"count 63 63 0.0 \n",
"unique 4 3 NaN \n",
"top 2010-01-13 10:44:49 discovered NaN \n",
"freq 45 38 NaN \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
2022-02-14 13:34:42 +01:00
"count 0.0 0.0 2242 \n",
"unique NaN NaN 4 \n",
"top NaN NaN 0 \n",
2022-02-14 13:34:42 +01:00
"freq NaN NaN 2201 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" sword_depositor sword_slug exemplar home_page \\\n",
2022-02-14 13:34:42 +01:00
"count 0.0 0.0 268 5437 \n",
"unique NaN NaN 2 5271 \n",
"top NaN NaN FALSE http://eprints.upnjatim.ac.id/ \n",
2022-02-14 13:34:42 +01:00
"freq NaN NaN 261 4 \n",
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n",
"\n",
" title oai_pmh \\\n",
2022-02-14 13:34:42 +01:00
"count 5442 4332 \n",
"unique 5143 4059 \n",
"top Repositorio Institucional http://kce.docressources.info/ws/PMBWs_2 \n",
"freq 7 4 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" sword_endpoint \\\n",
2022-02-14 13:34:42 +01:00
"count 178 \n",
"unique 172 \n",
"top http://producao.usp.br/sword/servicedocument \n",
"freq 2 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" rss_feed \\\n",
2022-02-14 13:34:42 +01:00
"count 1538 \n",
"unique 1485 \n",
"top http://eprints.upnjatim.ac.id/cgi/latest_tool?... \n",
"freq 5 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" twitter_feed \\\n",
2022-02-14 13:34:42 +01:00
"count 116 \n",
"unique 112 \n",
"top http://my.indexcopernicus.com/fredemoreno \n",
"freq 2 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" description fulltext open_access mandate \\\n",
2022-02-14 13:34:42 +01:00
"count 3837 4197 4197 3746 \n",
"unique 3359 2 2 2 \n",
"top info:other:archives.eprints.org:import TRUE TRUE FALSE \n",
2022-02-14 13:34:42 +01:00
"freq 112 2805 2696 2748 \n",
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n",
"\n",
" organisation_title organisation_home_page \\\n",
2022-02-14 13:34:42 +01:00
"count 4460 4286 \n",
"unique 3858 3831 \n",
"top Chinese Academy of Science (中国科学院) http://www.cas.cn/ \n",
"freq 9 9 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" location_country location_city location_latitude location_longitude \\\n",
2022-02-14 13:34:42 +01:00
"count 5138 3714 3725 3708 \n",
"unique 144 1884 2923 2953 \n",
"top us Lima 34.1607 -118.139 \n",
2022-02-14 13:34:42 +01:00
"freq 891 74 25 25 \n",
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n",
"\n",
" software geoname version subjects date \\\n",
2022-02-14 13:34:42 +01:00
"count 4700 4730 5444 1289 5429 \n",
"unique 31 126 53 938 4898 \n",
"top dspace geoname_2_US other K1 2006-05-04 10:48:14 \n",
2022-02-14 13:34:42 +01:00
"freq 2341 845 4841 53 99 \n",
"mean NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN \n",
"\n",
" note \\\n",
2022-02-14 13:34:42 +01:00
"count 218 \n",
"unique 210 \n",
"top ¿Quién puede depositar documentos en el reposi... \n",
"freq 2 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" suggestions activity_low \\\n",
2022-02-14 13:34:42 +01:00
"count 189 2288 \n",
"unique 173 72 \n",
"top This repository is hosted by the Texas Digital... 0 \n",
2022-02-14 13:34:42 +01:00
"freq 9 2012 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" activity_medium activity_high recordcount \\\n",
2022-02-14 13:34:42 +01:00
"count 2288 2288 2290 \n",
"unique 54 16 741 \n",
"top 0 0 100 \n",
2022-02-14 13:34:42 +01:00
"freq 2074 2210 730 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" recordhistory fulltexts_total \\\n",
2022-02-14 13:34:42 +01:00
"count 2288 270 \n",
"unique 1702 135 \n",
"top 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 0 \n",
"freq 95 113 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name \\\n",
2022-02-14 13:34:42 +01:00
"count 258 270 258 4605 \n",
"unique 118 134 117 7 \n",
"top 0 0 0 [opendoar, celestial] \n",
2022-02-14 13:34:42 +01:00
"freq 114 113 114 2106 \n",
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n",
"\n",
" registry_id submit_to submitted_to_name \\\n",
2022-02-14 13:34:42 +01:00
"count 4580 375 205 \n",
2022-03-17 10:33:11 +01:00
"unique 4261 7 1 \n",
"top 2479 [opendoar, roarmap, celestial] opendoar \n",
2022-02-14 13:34:42 +01:00
"freq 4 119 205 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" submitted_to_done webometrics_rank webometrics_size \\\n",
"count 205 148 148 \n",
"unique 1 148 148 \n",
"top 2021-01-25 24 46 \n",
"freq 205 1 1 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" webometrics_visibility webometrics_rich_files webometrics_scholar \\\n",
"count 148 148 148 \n",
"unique 148 146 143 \n",
"top 20 824 806 \n",
"freq 1 3 5 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" monthly_deposits total_deposits \\\n",
"count 756 756 \n",
"unique 346 342 \n",
"top 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 0 \n",
"freq 387 387 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" association \n",
2022-02-14 13:34:42 +01:00
"count 223 \n",
"unique 3 \n",
"top russell_group \n",
2022-02-14 13:34:42 +01:00
"freq 130 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN "
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df.describe(include='all')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**FAIRsharing**"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>type</th>\n",
" <th>attributes.created-at</th>\n",
" <th>attributes.updated-at</th>\n",
" <th>attributes.metadata.doi</th>\n",
" <th>attributes.metadata.name</th>\n",
" <th>attributes.metadata.status</th>\n",
" <th>attributes.metadata.contacts</th>\n",
" <th>attributes.metadata.homepage</th>\n",
" <th>attributes.metadata.identifier</th>\n",
" <th>attributes.metadata.description</th>\n",
2022-03-17 10:33:11 +01:00
" <th>attributes.metadata.abbreviation</th>\n",
" <th>attributes.metadata.support-links</th>\n",
" <th>attributes.metadata.year-creation</th>\n",
" <th>attributes.metadata.data-processes</th>\n",
2022-03-17 10:33:11 +01:00
" <th>attributes.metadata.cross-references</th>\n",
" <th>attributes.legacy-ids</th>\n",
" <th>attributes.fairsharing-registry</th>\n",
" <th>attributes.record-type</th>\n",
" <th>attributes.subjects</th>\n",
" <th>attributes.domains</th>\n",
" <th>attributes.taxonomies</th>\n",
" <th>attributes.user-defined-tags</th>\n",
" <th>attributes.countries</th>\n",
" <th>attributes.name</th>\n",
" <th>attributes.abbreviation</th>\n",
" <th>attributes.url</th>\n",
" <th>attributes.doi</th>\n",
" <th>attributes.fairsharing-licence</th>\n",
" <th>attributes.description</th>\n",
" <th>attributes.publications</th>\n",
" <th>attributes.licence-links</th>\n",
2022-03-17 10:33:11 +01:00
" <th>attributes.url-for-logo</th>\n",
" <th>attributes.metadata.citations</th>\n",
" <th>attributes.metadata.associated-tools</th>\n",
" <th>attributes.metadata.deprecation-reason</th>\n",
2022-03-17 10:33:11 +01:00
" <th>attributes.metadata.data-access-condition.type</th>\n",
" <th>attributes.metadata.data-contact-information</th>\n",
" <th>attributes.metadata.data-deposition-condition.url</th>\n",
" <th>attributes.metadata.data-deposition-condition.type</th>\n",
" <th>attributes.metadata.deprecation-date</th>\n",
" <th>attributes.metadata.access-points</th>\n",
" <th>attributes.metadata.data-access-condition.url</th>\n",
" <th>attributes.metadata.resource-sustainability.url</th>\n",
" <th>attributes.metadata.resource-sustainability.name</th>\n",
" <th>attributes.metadata.data-preservation-policy.url</th>\n",
" <th>attributes.metadata.data-preservation-policy.name</th>\n",
" <th>attributes.metadata.data-access-for-pre-publication-review</th>\n",
" <th>attributes.metadata.data-versioning</th>\n",
" <th>attributes.metadata.data-curation.type</th>\n",
" <th>attributes.metadata.data-curation.url</th>\n",
" <th>attributes.metadata.citation-to-related-publications</th>\n",
" <th>attributes.metadata.tombstone</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2022-03-17 10:33:11 +01:00
" <td>3226</td>\n",
" <td>fairsharing-records</td>\n",
2022-03-17 10:33:11 +01:00
" <td>2020-12-09T11:53:44.000Z</td>\n",
" <td>2022-02-08T10:42:36.452Z</td>\n",
" <td>10.25504/FAIRsharing.d6423b</td>\n",
" <td>WDC Sunspot Index and Long-term Solar Observat...</td>\n",
" <td>ready</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[{'contact-name': 'Frédéric Clette', 'contact-...</td>\n",
" <td>http://sidc.be/silso/home</td>\n",
" <td>3226</td>\n",
" <td>The WDC-SILSO is an activity of the Operationa...</td>\n",
" <td>WDC-SILSO</td>\n",
" <td>[{'url': 'http://www.sidc.be/silso/taxonomy/te...</td>\n",
" <td>2013.0</td>\n",
" <td>[{'url': 'http://www.sidc.be/silso/datafiles',...</td>\n",
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
" <td>[biodbcore-001740, bsg-d001740]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[Electromagnetism, Astrophysics and Astronomy,...</td>\n",
" <td>[Climate, Observation design]</td>\n",
" <td>[Not applicable]</td>\n",
" <td>[Climate change, earth observation, Electromag...</td>\n",
" <td>[Belgium]</td>\n",
" <td>FAIRsharing record for: WDC Sunspot Index and ...</td>\n",
" <td>WDC-SILSO</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.d...</td>\n",
" <td>10.25504/FAIRsharing.d6423b</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: The WDC-SIL...</td>\n",
" <td>[]</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[{'licence-name': 'SILSO legal notices', 'lice...</td>\n",
" <td>None</td>\n",
2022-03-17 10:33:11 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2022-03-17 10:33:11 +01:00
" <td>2114</td>\n",
" <td>fairsharing-records</td>\n",
2022-03-17 10:33:11 +01:00
" <td>2014-11-04T15:23:40.000Z</td>\n",
" <td>2022-01-21T14:39:02.195Z</td>\n",
" <td>10.25504/FAIRsharing.p06nme</td>\n",
" <td>Biological Magnetic Resonance Data Bank</td>\n",
" <td>ready</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[{'contact-name': 'Helpdesk', 'contact-email':...</td>\n",
" <td>https://bmrb.io/</td>\n",
" <td>2114</td>\n",
" <td>BMRB collects, annotates, archives, and dissem...</td>\n",
" <td>BMRB</td>\n",
" <td>[{'url': 'https://bmrb.io/bmrb/news/', 'name':...</td>\n",
" <td>1988.0</td>\n",
" <td>[{'url': 'https://bmrb.io/data_library/rsync.s...</td>\n",
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
" <td>[biodbcore-000584, bsg-d000584]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[Structural Biology]</td>\n",
" <td>[Molecular structure, Protein structure, Pepti...</td>\n",
" <td>[All]</td>\n",
" <td>[]</td>\n",
" <td>[United States]</td>\n",
2022-03-17 10:33:11 +01:00
" <td>FAIRsharing record for: Biological Magnetic Re...</td>\n",
" <td>BMRB</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.p...</td>\n",
" <td>10.25504/FAIRsharing.p06nme</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
2022-03-17 10:33:11 +01:00
" <td>This FAIRsharing record describes: BMRB collec...</td>\n",
" <td>[{'id': 552, 'pubmed_id': 18288446, 'title': '...</td>\n",
" <td>[{'licence-name': 'wwPDB Privacy and Usage Pol...</td>\n",
" <td>None</td>\n",
" <td>[{'doi': '10.1093/nar/gkm957', 'pubmed-id': 17...</td>\n",
" <td>[{'url': 'https://bmrb.io/validate/', 'name': ...</td>\n",
" <td></td>\n",
" <td>open</td>\n",
" <td>yes</td>\n",
" <td>https://bmrb.io/deposit/</td>\n",
" <td>open</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2022-03-17 10:33:11 +01:00
" <td>3022</td>\n",
" <td>fairsharing-records</td>\n",
2022-03-17 10:33:11 +01:00
" <td>2020-06-17T10:25:30.000Z</td>\n",
" <td>2022-02-08T10:41:04.073Z</td>\n",
" <td>10.25504/FAIRsharing.8b7a2f</td>\n",
" <td>Fisheries and Oceans Canada Pacific Region Dat...</td>\n",
" <td>ready</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[{'contact-name': 'Peter Chandler', 'contact-e...</td>\n",
" <td>http://www.pac.dfo-mpo.gc.ca/science/oceans/da...</td>\n",
" <td>3022</td>\n",
" <td>The Institute of Ocean Sciences (IOS)/Ocean Sc...</td>\n",
" <td>None</td>\n",
" <td>[{'url': 'DFO.PAC.SCI.IOSData-DonneesISO.SCI.P...</td>\n",
" <td>NaN</td>\n",
" <td>[{'name': 'Users must contact the Senior Analy...</td>\n",
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
" <td>[biodbcore-001530, bsg-d001530]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[Environmental Science, Meteorology, Earth Sci...</td>\n",
" <td>[Climate]</td>\n",
" <td>[Not applicable]</td>\n",
" <td>[Salinity, Temperature]</td>\n",
" <td>[Canada]</td>\n",
" <td>FAIRsharing record for: Fisheries and Oceans C...</td>\n",
" <td>None</td>\n",
2022-03-17 10:33:11 +01:00
" <td>https://fairsharing.org/10.25504/FAIRsharing.8...</td>\n",
" <td>10.25504/FAIRsharing.8b7a2f</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
2022-03-17 10:33:11 +01:00
" <td>This FAIRsharing record describes: The Institu...</td>\n",
" <td>[]</td>\n",
" <td>[{'licence-name': 'Fisheries and Oceans Canada...</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2022-03-17 10:33:11 +01:00
" <td>2998</td>\n",
" <td>fairsharing-records</td>\n",
2022-03-17 10:33:11 +01:00
" <td>2020-05-21T07:42:30.000Z</td>\n",
" <td>2022-02-08T10:40:19.531Z</td>\n",
" <td>10.25504/FAIRsharing.e08886</td>\n",
" <td>Climate Prediction Center</td>\n",
" <td>ready</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[{'contact-name': 'Jon Hoopingarner', 'contact...</td>\n",
" <td>https://www.cpc.ncep.noaa.gov/</td>\n",
" <td>2998</td>\n",
" <td>The Climate Prediction Center (CPC) produces o...</td>\n",
" <td>CPC</td>\n",
" <td>[{'url': 'https://www.cpc.ncep.noaa.gov/commen...</td>\n",
" <td>1970.0</td>\n",
" <td>[{'url': 'https://www.cpc.ncep.noaa.gov/', 'na...</td>\n",
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
" <td>[biodbcore-001504, bsg-d001504]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[Hydrogeology, Geography, Meteorology, Geodesy...</td>\n",
" <td>[Climate]</td>\n",
" <td>[Not applicable]</td>\n",
" <td>[Forecasting, weather]</td>\n",
" <td>[United States]</td>\n",
" <td>FAIRsharing record for: Climate Prediction Center</td>\n",
" <td>CPC</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.e...</td>\n",
" <td>10.25504/FAIRsharing.e08886</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
2022-03-17 10:33:11 +01:00
" <td>This FAIRsharing record describes: The Climate...</td>\n",
" <td>[]</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[{'licence-name': 'National Weather Service Di...</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2022-03-17 10:33:11 +01:00
" <td>2301</td>\n",
" <td>fairsharing-records</td>\n",
2022-03-17 10:33:11 +01:00
" <td>2016-06-03T14:54:08.000Z</td>\n",
" <td>2021-11-24T13:17:51.201Z</td>\n",
" <td>10.25504/FAIRsharing.meh9wz</td>\n",
" <td>Acytostelium Gene Database</td>\n",
" <td>deprecated</td>\n",
" <td>[{'contact-name': 'Acytostelium genome consort...</td>\n",
" <td>http://cosmos.bot.kyoto-u.ac.jp/acytodb//cgi-b...</td>\n",
" <td>2301</td>\n",
" <td>Genome and transcriptome database of Acytostel...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2008.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[biodbcore-000775, bsg-d000775]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[Genomics, Life Science, Transcriptomics]</td>\n",
" <td>[DNA sequence data, Gene model annotation]</td>\n",
" <td>[Acytostelium subglobosum]</td>\n",
" <td>[]</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[United Kingdom, Japan]</td>\n",
" <td>FAIRsharing record for: Acytostelium Gene Data...</td>\n",
" <td>None</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.m...</td>\n",
" <td>10.25504/FAIRsharing.meh9wz</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
2022-03-17 10:33:11 +01:00
" <td>This FAIRsharing record describes: Genome and ...</td>\n",
" <td>[{'id': 1139, 'pubmed_id': 25758444, 'title': ...</td>\n",
" <td>[]</td>\n",
2022-03-17 10:33:11 +01:00
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>This resource is no longer available at the st...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2021-9-17</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id type attributes.created-at \\\n",
2022-03-17 10:33:11 +01:00
"0 3226 fairsharing-records 2020-12-09T11:53:44.000Z \n",
"1 2114 fairsharing-records 2014-11-04T15:23:40.000Z \n",
"2 3022 fairsharing-records 2020-06-17T10:25:30.000Z \n",
"3 2998 fairsharing-records 2020-05-21T07:42:30.000Z \n",
"4 2301 fairsharing-records 2016-06-03T14:54:08.000Z \n",
"\n",
" attributes.updated-at attributes.metadata.doi \\\n",
2022-03-17 10:33:11 +01:00
"0 2022-02-08T10:42:36.452Z 10.25504/FAIRsharing.d6423b \n",
"1 2022-01-21T14:39:02.195Z 10.25504/FAIRsharing.p06nme \n",
"2 2022-02-08T10:41:04.073Z 10.25504/FAIRsharing.8b7a2f \n",
"3 2022-02-08T10:40:19.531Z 10.25504/FAIRsharing.e08886 \n",
"4 2021-11-24T13:17:51.201Z 10.25504/FAIRsharing.meh9wz \n",
"\n",
" attributes.metadata.name \\\n",
"0 WDC Sunspot Index and Long-term Solar Observat... \n",
"1 Biological Magnetic Resonance Data Bank \n",
"2 Fisheries and Oceans Canada Pacific Region Dat... \n",
"3 Climate Prediction Center \n",
"4 Acytostelium Gene Database \n",
"\n",
" attributes.metadata.status \\\n",
"0 ready \n",
"1 ready \n",
"2 ready \n",
"3 ready \n",
"4 deprecated \n",
"\n",
" attributes.metadata.contacts \\\n",
2022-03-17 10:33:11 +01:00
"0 [{'contact-name': 'Frédéric Clette', 'contact-... \n",
"1 [{'contact-name': 'Helpdesk', 'contact-email':... \n",
"2 [{'contact-name': 'Peter Chandler', 'contact-e... \n",
"3 [{'contact-name': 'Jon Hoopingarner', 'contact... \n",
"4 [{'contact-name': 'Acytostelium genome consort... \n",
"\n",
" attributes.metadata.homepage \\\n",
"0 http://sidc.be/silso/home \n",
"1 https://bmrb.io/ \n",
"2 http://www.pac.dfo-mpo.gc.ca/science/oceans/da... \n",
"3 https://www.cpc.ncep.noaa.gov/ \n",
"4 http://cosmos.bot.kyoto-u.ac.jp/acytodb//cgi-b... \n",
"\n",
" attributes.metadata.identifier \\\n",
2022-03-17 10:33:11 +01:00
"0 3226 \n",
"1 2114 \n",
"2 3022 \n",
"3 2998 \n",
"4 2301 \n",
"\n",
" attributes.metadata.description \\\n",
2022-03-17 10:33:11 +01:00
"0 The WDC-SILSO is an activity of the Operationa... \n",
"1 BMRB collects, annotates, archives, and dissem... \n",
"2 The Institute of Ocean Sciences (IOS)/Ocean Sc... \n",
"3 The Climate Prediction Center (CPC) produces o... \n",
"4 Genome and transcriptome database of Acytostel... \n",
"\n",
" attributes.metadata.abbreviation \\\n",
"0 WDC-SILSO \n",
"1 BMRB \n",
"2 None \n",
"3 CPC \n",
"4 NaN \n",
"\n",
" attributes.metadata.support-links \\\n",
2022-03-17 10:33:11 +01:00
"0 [{'url': 'http://www.sidc.be/silso/taxonomy/te... \n",
"1 [{'url': 'https://bmrb.io/bmrb/news/', 'name':... \n",
"2 [{'url': 'DFO.PAC.SCI.IOSData-DonneesISO.SCI.P... \n",
"3 [{'url': 'https://www.cpc.ncep.noaa.gov/commen... \n",
"4 NaN \n",
"\n",
" attributes.metadata.year-creation \\\n",
2022-03-17 10:33:11 +01:00
"0 2013.0 \n",
"1 1988.0 \n",
"2 NaN \n",
"3 1970.0 \n",
"4 2008.0 \n",
"\n",
" attributes.metadata.data-processes \\\n",
2022-03-17 10:33:11 +01:00
"0 [{'url': 'http://www.sidc.be/silso/datafiles',... \n",
"1 [{'url': 'https://bmrb.io/data_library/rsync.s... \n",
"2 [{'name': 'Users must contact the Senior Analy... \n",
"3 [{'url': 'https://www.cpc.ncep.noaa.gov/', 'na... \n",
"4 NaN \n",
"\n",
" attributes.metadata.cross-references \\\n",
"0 [{'url': 'https://www.re3data.org/repository/r... \n",
"1 [{'url': 'https://www.re3data.org/repository/r... \n",
"2 [{'url': 'https://www.re3data.org/repository/r... \n",
"3 [{'url': 'https://www.re3data.org/repository/r... \n",
"4 NaN \n",
"\n",
" attributes.legacy-ids attributes.fairsharing-registry \\\n",
2022-03-17 10:33:11 +01:00
"0 [biodbcore-001740, bsg-d001740] Database \n",
"1 [biodbcore-000584, bsg-d000584] Database \n",
"2 [biodbcore-001530, bsg-d001530] Database \n",
"3 [biodbcore-001504, bsg-d001504] Database \n",
"4 [biodbcore-000775, bsg-d000775] Database \n",
"\n",
" attributes.record-type attributes.subjects \\\n",
"0 repository [Electromagnetism, Astrophysics and Astronomy,... \n",
"1 repository [Structural Biology] \n",
"2 repository [Environmental Science, Meteorology, Earth Sci... \n",
"3 repository [Hydrogeology, Geography, Meteorology, Geodesy... \n",
"4 repository [Genomics, Life Science, Transcriptomics] \n",
"\n",
" attributes.domains \\\n",
2022-03-17 10:33:11 +01:00
"0 [Climate, Observation design] \n",
"1 [Molecular structure, Protein structure, Pepti... \n",
"2 [Climate] \n",
"3 [Climate] \n",
"4 [DNA sequence data, Gene model annotation] \n",
"\n",
" attributes.taxonomies \\\n",
"0 [Not applicable] \n",
"1 [All] \n",
"2 [Not applicable] \n",
"3 [Not applicable] \n",
"4 [Acytostelium subglobosum] \n",
"\n",
" attributes.user-defined-tags attributes.countries \\\n",
"0 [Climate change, earth observation, Electromag... [Belgium] \n",
"1 [] [United States] \n",
"2 [Salinity, Temperature] [Canada] \n",
"3 [Forecasting, weather] [United States] \n",
"4 [] [United Kingdom, Japan] \n",
"\n",
" attributes.name attributes.abbreviation \\\n",
2022-03-17 10:33:11 +01:00
"0 FAIRsharing record for: WDC Sunspot Index and ... WDC-SILSO \n",
"1 FAIRsharing record for: Biological Magnetic Re... BMRB \n",
"2 FAIRsharing record for: Fisheries and Oceans C... None \n",
"3 FAIRsharing record for: Climate Prediction Center CPC \n",
"4 FAIRsharing record for: Acytostelium Gene Data... None \n",
"\n",
" attributes.url \\\n",
2022-03-17 10:33:11 +01:00
"0 https://fairsharing.org/10.25504/FAIRsharing.d... \n",
"1 https://fairsharing.org/10.25504/FAIRsharing.p... \n",
"2 https://fairsharing.org/10.25504/FAIRsharing.8... \n",
"3 https://fairsharing.org/10.25504/FAIRsharing.e... \n",
"4 https://fairsharing.org/10.25504/FAIRsharing.m... \n",
"\n",
" attributes.doi \\\n",
2022-03-17 10:33:11 +01:00
"0 10.25504/FAIRsharing.d6423b \n",
"1 10.25504/FAIRsharing.p06nme \n",
"2 10.25504/FAIRsharing.8b7a2f \n",
"3 10.25504/FAIRsharing.e08886 \n",
"4 10.25504/FAIRsharing.meh9wz \n",
"\n",
" attributes.fairsharing-licence \\\n",
"0 https://creativecommons.org/licenses/by-sa/4.0... \n",
"1 https://creativecommons.org/licenses/by-sa/4.0... \n",
"2 https://creativecommons.org/licenses/by-sa/4.0... \n",
"3 https://creativecommons.org/licenses/by-sa/4.0... \n",
"4 https://creativecommons.org/licenses/by-sa/4.0... \n",
"\n",
" attributes.description \\\n",
2022-03-17 10:33:11 +01:00
"0 This FAIRsharing record describes: The WDC-SIL... \n",
"1 This FAIRsharing record describes: BMRB collec... \n",
"2 This FAIRsharing record describes: The Institu... \n",
"3 This FAIRsharing record describes: The Climate... \n",
"4 This FAIRsharing record describes: Genome and ... \n",
"\n",
" attributes.publications \\\n",
2022-03-17 10:33:11 +01:00
"0 [] \n",
"1 [{'id': 552, 'pubmed_id': 18288446, 'title': '... \n",
"2 [] \n",
"3 [] \n",
2022-03-17 10:33:11 +01:00
"4 [{'id': 1139, 'pubmed_id': 25758444, 'title': ... \n",
"\n",
2022-03-17 10:33:11 +01:00
" attributes.licence-links attributes.url-for-logo \\\n",
"0 [{'licence-name': 'SILSO legal notices', 'lice... None \n",
"1 [{'licence-name': 'wwPDB Privacy and Usage Pol... None \n",
"2 [{'licence-name': 'Fisheries and Oceans Canada... None \n",
"3 [{'licence-name': 'National Weather Service Di... None \n",
"4 [] None \n",
"\n",
" attributes.metadata.citations \\\n",
"0 NaN \n",
2022-03-17 10:33:11 +01:00
"1 [{'doi': '10.1093/nar/gkm957', 'pubmed-id': 17... \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.associated-tools \\\n",
"0 NaN \n",
2022-03-17 10:33:11 +01:00
"1 [{'url': 'https://bmrb.io/validate/', 'name': ... \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.deprecation-reason \\\n",
"0 NaN \n",
"1 \n",
"2 NaN \n",
"3 NaN \n",
"4 This resource is no longer available at the st... \n",
"\n",
" attributes.metadata.data-access-condition.type \\\n",
"0 NaN \n",
"1 open \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.data-contact-information \\\n",
"0 NaN \n",
"1 yes \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.data-deposition-condition.url \\\n",
"0 NaN \n",
"1 https://bmrb.io/deposit/ \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.data-deposition-condition.type \\\n",
"0 NaN \n",
2022-03-17 10:33:11 +01:00
"1 open \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.deprecation-date attributes.metadata.access-points \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 2021-9-17 NaN \n",
"\n",
" attributes.metadata.data-access-condition.url \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.resource-sustainability.url \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.resource-sustainability.name \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.data-preservation-policy.url \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.data-preservation-policy.name \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.data-access-for-pre-publication-review \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.data-versioning attributes.metadata.data-curation.type \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" attributes.metadata.data-curation.url \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.citation-to-related-publications \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.tombstone \n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN "
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2022-03-17 10:33:11 +01:00
"with open('../data/raw/fairsharing_dump_api_02_2022.json') as f:\n",
" lines = f.read().splitlines()\n",
" \n",
"fairsharing_df = pd.DataFrame(lines)\n",
"fairsharing_df.columns = ['json_element']\n",
"fairsharing_df['json_element'].apply(json.loads)\n",
"fairsharing_df = pd.json_normalize(fairsharing_df['json_element'].apply(json.loads))\n",
"\n",
"fairsharing_df.head()"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>type</th>\n",
" <th>attributes.created-at</th>\n",
" <th>attributes.updated-at</th>\n",
" <th>attributes.metadata.doi</th>\n",
" <th>attributes.metadata.name</th>\n",
" <th>attributes.metadata.status</th>\n",
" <th>attributes.metadata.contacts</th>\n",
" <th>attributes.metadata.homepage</th>\n",
" <th>attributes.metadata.identifier</th>\n",
" <th>attributes.metadata.description</th>\n",
2022-03-17 10:33:11 +01:00
" <th>attributes.metadata.abbreviation</th>\n",
" <th>attributes.metadata.support-links</th>\n",
" <th>attributes.metadata.year-creation</th>\n",
" <th>attributes.metadata.data-processes</th>\n",
2022-03-17 10:33:11 +01:00
" <th>attributes.metadata.cross-references</th>\n",
" <th>attributes.legacy-ids</th>\n",
" <th>attributes.fairsharing-registry</th>\n",
" <th>attributes.record-type</th>\n",
" <th>attributes.subjects</th>\n",
" <th>attributes.domains</th>\n",
" <th>attributes.taxonomies</th>\n",
" <th>attributes.user-defined-tags</th>\n",
" <th>attributes.countries</th>\n",
" <th>attributes.name</th>\n",
" <th>attributes.abbreviation</th>\n",
" <th>attributes.url</th>\n",
" <th>attributes.doi</th>\n",
" <th>attributes.fairsharing-licence</th>\n",
" <th>attributes.description</th>\n",
" <th>attributes.publications</th>\n",
" <th>attributes.licence-links</th>\n",
2022-03-17 10:33:11 +01:00
" <th>attributes.url-for-logo</th>\n",
" <th>attributes.metadata.citations</th>\n",
" <th>attributes.metadata.associated-tools</th>\n",
" <th>attributes.metadata.deprecation-reason</th>\n",
2022-03-17 10:33:11 +01:00
" <th>attributes.metadata.data-access-condition.type</th>\n",
" <th>attributes.metadata.data-contact-information</th>\n",
" <th>attributes.metadata.data-deposition-condition.url</th>\n",
" <th>attributes.metadata.data-deposition-condition.type</th>\n",
" <th>attributes.metadata.deprecation-date</th>\n",
" <th>attributes.metadata.access-points</th>\n",
" <th>attributes.metadata.data-access-condition.url</th>\n",
" <th>attributes.metadata.resource-sustainability.url</th>\n",
" <th>attributes.metadata.resource-sustainability.name</th>\n",
" <th>attributes.metadata.data-preservation-policy.url</th>\n",
" <th>attributes.metadata.data-preservation-policy.name</th>\n",
" <th>attributes.metadata.data-access-for-pre-publication-review</th>\n",
" <th>attributes.metadata.data-versioning</th>\n",
" <th>attributes.metadata.data-curation.type</th>\n",
" <th>attributes.metadata.data-curation.url</th>\n",
" <th>attributes.metadata.citation-to-related-publications</th>\n",
" <th>attributes.metadata.tombstone</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
2022-03-17 10:33:11 +01:00
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1601</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1764</td>\n",
" <td>1853</td>\n",
" <td>1853.000000</td>\n",
" <td>1853</td>\n",
" <td>1671</td>\n",
" <td>1663</td>\n",
" <td>1541.000000</td>\n",
" <td>1626</td>\n",
" <td>790</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1671</td>\n",
" <td>1853</td>\n",
" <td>1601</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>1853</td>\n",
" <td>18</td>\n",
" <td>621</td>\n",
" <td>632</td>\n",
" <td>363</td>\n",
" <td>42</td>\n",
" <td>47</td>\n",
" <td>22</td>\n",
" <td>33</td>\n",
" <td>238</td>\n",
" <td>465</td>\n",
" <td>19</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>10</td>\n",
" <td>17</td>\n",
" <td>22</td>\n",
" <td>8</td>\n",
" <td>35</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
2022-03-17 10:33:11 +01:00
" <td>1853</td>\n",
" <td>1</td>\n",
2022-03-17 10:33:11 +01:00
" <td>1218</td>\n",
" <td>1853</td>\n",
" <td>1601</td>\n",
" <td>1851</td>\n",
" <td>4</td>\n",
2022-03-17 10:33:11 +01:00
" <td>1623</td>\n",
" <td>1853</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" <td>1853</td>\n",
" <td>1655</td>\n",
" <td>1646</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" <td>1625</td>\n",
" <td>790</td>\n",
" <td>1799</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
2022-03-17 10:33:11 +01:00
" <td>935</td>\n",
" <td>1205</td>\n",
" <td>385</td>\n",
" <td>395</td>\n",
" <td>194</td>\n",
" <td>1851</td>\n",
" <td>1655</td>\n",
" <td>1853</td>\n",
" <td>1601</td>\n",
" <td>1</td>\n",
2022-03-17 10:33:11 +01:00
" <td>1853</td>\n",
" <td>1135</td>\n",
" <td>1119</td>\n",
" <td>18</td>\n",
" <td>331</td>\n",
" <td>627</td>\n",
" <td>104</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>22</td>\n",
" <td>2</td>\n",
" <td>71</td>\n",
" <td>460</td>\n",
" <td>19</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>4</td>\n",
" <td>8</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
2022-03-17 10:33:11 +01:00
" <td>3226</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2014-11-04T15:23:40.000Z</td>\n",
2022-03-17 10:33:11 +01:00
" <td>2022-02-08T10:42:36.452Z</td>\n",
" <td>10.25504/FAIRsharing.d6423b</td>\n",
" <td>iDog</td>\n",
" <td>ready</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[]</td>\n",
" <td>http://sidc.be/silso/home</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" <td>The WDC-SILSO is an activity of the Operationa...</td>\n",
" <td>CGD</td>\n",
" <td>[{'url': 'https://github.com/gbif/ipt/wiki/IPT...</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[{'url': 'https://site.uit.no/dataverseno/abou...</td>\n",
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
" <td>[]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Life Science]</td>\n",
" <td>[]</td>\n",
" <td>[All]</td>\n",
" <td>[]</td>\n",
" <td>[United States]</td>\n",
2022-03-17 10:33:11 +01:00
" <td>FAIRsharing record for: iDog</td>\n",
" <td>CGD</td>\n",
2022-03-17 10:33:11 +01:00
" <td>https://fairsharing.org/10.25504/FAIRsharing.d...</td>\n",
" <td>10.25504/FAIRsharing.d6423b</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
2022-03-17 10:33:11 +01:00
" <td>This FAIRsharing record describes: The WDC-SIL...</td>\n",
" <td>[]</td>\n",
" <td>[]</td>\n",
2022-03-17 10:33:11 +01:00
" <td>/rails/active_storage/blobs/redirect/eyJfcmFpb...</td>\n",
" <td>[]</td>\n",
" <td>[]</td>\n",
" <td></td>\n",
" <td>open</td>\n",
" <td>yes</td>\n",
" <td>https://bmrb.io/deposit/</td>\n",
" <td>controlled</td>\n",
" <td>2021-9-17</td>\n",
2022-03-17 10:33:11 +01:00
" <td>[{'url': 'https://heidata.uni-heidelberg.de/oa...</td>\n",
" <td>https://arch.library.northwestern.edu/about?lo...</td>\n",
" <td>https://www.library.northwestern.edu/about/adm...</td>\n",
" <td>Commitment to Sustainability: Level 1</td>\n",
" <td>http://www.library.northwestern.edu/about/admi...</td>\n",
" <td>Digital Preservation Policy: Level 1</td>\n",
" <td>yes</td>\n",
" <td>yes</td>\n",
" <td>manual</td>\n",
" <td>https://www.gbif.org/tools/data-validator/about</td>\n",
" <td>yes</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>1</td>\n",
2022-03-17 10:33:11 +01:00
" <td>1853</td>\n",
" <td>636</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
2022-03-17 10:33:11 +01:00
" <td>1564</td>\n",
" <td>40</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
2022-03-17 10:33:11 +01:00
" <td>3</td>\n",
" <td>6</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
2022-03-17 10:33:11 +01:00
" <td>55</td>\n",
" <td>1853</td>\n",
" <td>954</td>\n",
" <td>345</td>\n",
" <td>276</td>\n",
" <td>528</td>\n",
" <td>1258</td>\n",
" <td>607</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
2022-03-17 10:33:11 +01:00
" <td>1853</td>\n",
" <td>1</td>\n",
2022-03-17 10:33:11 +01:00
" <td>690</td>\n",
" <td>735</td>\n",
" <td>1</td>\n",
" <td>285</td>\n",
" <td>3</td>\n",
2022-03-17 10:33:11 +01:00
" <td>125</td>\n",
" <td>38</td>\n",
" <td>45</td>\n",
" <td>1</td>\n",
" <td>21</td>\n",
" <td>81</td>\n",
" <td>3</td>\n",
2022-03-17 10:33:11 +01:00
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>9</td>\n",
" <td>16</td>\n",
" <td>11</td>\n",
" <td>1</td>\n",
" <td>34</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" <td>2481.862925</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" <td>2007.894873</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" <td>554.072492</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" <td>10.933713</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" <td>1120.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" <td>1894.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" <td>2009.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" <td>2004.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2473.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2010.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2938.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2015.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>3827.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2022.000000</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id type attributes.created-at \\\n",
2022-03-17 10:33:11 +01:00
"count 1853 1853 1853 \n",
"unique 1853 1 1218 \n",
"top 3226 fairsharing-records 2014-11-04T15:23:40.000Z \n",
"freq 1 1853 636 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" attributes.updated-at attributes.metadata.doi \\\n",
2022-03-17 10:33:11 +01:00
"count 1853 1601 \n",
"unique 1853 1601 \n",
"top 2022-02-08T10:42:36.452Z 10.25504/FAIRsharing.d6423b \n",
"freq 1 1 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.metadata.name attributes.metadata.status \\\n",
2022-03-17 10:33:11 +01:00
"count 1853 1853 \n",
"unique 1851 4 \n",
"top iDog ready \n",
"freq 2 1564 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.contacts attributes.metadata.homepage \\\n",
"count 1764 1853 \n",
"unique 1623 1853 \n",
"top [] http://sidc.be/silso/home \n",
"freq 40 1 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.metadata.identifier \\\n",
"count 1853.000000 \n",
"unique NaN \n",
"top NaN \n",
"freq NaN \n",
"mean 2481.862925 \n",
"std 554.072492 \n",
"min 1120.000000 \n",
"25% 2009.000000 \n",
"50% 2473.000000 \n",
"75% 2938.000000 \n",
"max 3827.000000 \n",
"\n",
" attributes.metadata.description \\\n",
2022-03-17 10:33:11 +01:00
"count 1853 \n",
"unique 1853 \n",
"top The WDC-SILSO is an activity of the Operationa... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.abbreviation \\\n",
"count 1671 \n",
"unique 1655 \n",
"top CGD \n",
"freq 3 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.support-links \\\n",
2022-03-17 10:33:11 +01:00
"count 1663 \n",
"unique 1646 \n",
"top [{'url': 'https://github.com/gbif/ipt/wiki/IPT... \n",
"freq 6 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.year-creation \\\n",
2022-03-17 10:33:11 +01:00
"count 1541.000000 \n",
"unique NaN \n",
"top NaN \n",
"freq NaN \n",
2022-03-17 10:33:11 +01:00
"mean 2007.894873 \n",
"std 10.933713 \n",
"min 1894.000000 \n",
"25% 2004.000000 \n",
"50% 2010.000000 \n",
2022-03-17 10:33:11 +01:00
"75% 2015.000000 \n",
"max 2022.000000 \n",
"\n",
" attributes.metadata.data-processes \\\n",
2022-03-17 10:33:11 +01:00
"count 1626 \n",
"unique 1625 \n",
"top [{'url': 'https://site.uit.no/dataverseno/abou... \n",
"freq 2 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.cross-references \\\n",
"count 790 \n",
"unique 790 \n",
"top [{'url': 'https://www.re3data.org/repository/r... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.legacy-ids attributes.fairsharing-registry \\\n",
"count 1853 1853 \n",
"unique 1799 1 \n",
"top [] Database \n",
"freq 55 1853 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.record-type attributes.subjects attributes.domains \\\n",
2022-03-17 10:33:11 +01:00
"count 1853 1853 1853 \n",
"unique 3 935 1205 \n",
"top repository [Life Science] [] \n",
2022-03-17 10:33:11 +01:00
"freq 954 345 276 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" attributes.taxonomies attributes.user-defined-tags \\\n",
2022-03-17 10:33:11 +01:00
"count 1853 1853 \n",
"unique 385 395 \n",
"top [All] [] \n",
2022-03-17 10:33:11 +01:00
"freq 528 1258 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
2022-03-17 10:33:11 +01:00
" attributes.countries attributes.name \\\n",
"count 1853 1853 \n",
"unique 194 1851 \n",
"top [United States] FAIRsharing record for: iDog \n",
"freq 607 2 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.abbreviation \\\n",
2022-03-17 10:33:11 +01:00
"count 1671 \n",
"unique 1655 \n",
"top CGD \n",
"freq 3 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.url \\\n",
2022-03-17 10:33:11 +01:00
"count 1853 \n",
"unique 1853 \n",
"top https://fairsharing.org/10.25504/FAIRsharing.d... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.doi \\\n",
2022-03-17 10:33:11 +01:00
"count 1601 \n",
"unique 1601 \n",
"top 10.25504/FAIRsharing.d6423b \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.fairsharing-licence \\\n",
2022-03-17 10:33:11 +01:00
"count 1853 \n",
"unique 1 \n",
"top https://creativecommons.org/licenses/by-sa/4.0... \n",
2022-03-17 10:33:11 +01:00
"freq 1853 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.description \\\n",
2022-03-17 10:33:11 +01:00
"count 1853 \n",
"unique 1853 \n",
"top This FAIRsharing record describes: The WDC-SIL... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.publications attributes.licence-links \\\n",
2022-03-17 10:33:11 +01:00
"count 1853 1853 \n",
"unique 1135 1119 \n",
"top [] [] \n",
2022-03-17 10:33:11 +01:00
"freq 690 735 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
2022-03-17 10:33:11 +01:00
" attributes.url-for-logo \\\n",
"count 18 \n",
"unique 18 \n",
"top /rails/active_storage/blobs/redirect/eyJfcmFpb... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.citations attributes.metadata.associated-tools \\\n",
"count 621 632 \n",
"unique 331 627 \n",
"top [] [] \n",
"freq 285 3 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" attributes.metadata.deprecation-reason \\\n",
"count 363 \n",
"unique 104 \n",
"top \n",
"freq 125 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-access-condition.type \\\n",
"count 42 \n",
"unique 2 \n",
"top open \n",
"freq 38 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-contact-information \\\n",
"count 47 \n",
"unique 2 \n",
"top yes \n",
"freq 45 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-deposition-condition.url \\\n",
"count 22 \n",
"unique 22 \n",
"top https://bmrb.io/deposit/ \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-deposition-condition.type \\\n",
"count 33 \n",
"unique 2 \n",
"top controlled \n",
"freq 21 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.deprecation-date \\\n",
"count 238 \n",
"unique 71 \n",
"top 2021-9-17 \n",
"freq 81 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.access-points \\\n",
2022-03-17 10:33:11 +01:00
"count 465 \n",
"unique 460 \n",
"top [{'url': 'https://heidata.uni-heidelberg.de/oa... \n",
"freq 3 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.data-access-condition.url \\\n",
"count 19 \n",
"unique 19 \n",
"top https://arch.library.northwestern.edu/about?lo... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.resource-sustainability.url \\\n",
"count 2 \n",
"unique 2 \n",
"top https://www.library.northwestern.edu/about/adm... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.resource-sustainability.name \\\n",
"count 2 \n",
"unique 2 \n",
"top Commitment to Sustainability: Level 1 \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-preservation-policy.url \\\n",
"count 3 \n",
"unique 3 \n",
"top http://www.library.northwestern.edu/about/admi... \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.data-preservation-policy.name \\\n",
"count 3 \n",
"unique 3 \n",
"top Digital Preservation Policy: Level 1 \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-access-for-pre-publication-review \\\n",
"count 10 \n",
"unique 2 \n",
"top yes \n",
"freq 9 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-versioning \\\n",
"count 17 \n",
"unique 2 \n",
"top yes \n",
"freq 16 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-curation.type \\\n",
"count 22 \n",
"unique 4 \n",
"top manual \n",
"freq 11 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.data-curation.url \\\n",
"count 8 \n",
"unique 8 \n",
"top https://www.gbif.org/tools/data-validator/about \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.citation-to-related-publications \\\n",
"count 35 \n",
"unique 2 \n",
"top yes \n",
"freq 34 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" attributes.metadata.tombstone \n",
"count 1 \n",
"unique 1 \n",
"top True \n",
"freq 1 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN "
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fairsharing_df.describe(include='all')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Subjects analysis"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**re3data**"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>orgIdentifier</th>\n",
" <th>subject</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>r3d100000001</td>\n",
" <td>1 Humanities and Social Sciences</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>r3d100000001</td>\n",
" <td>111 Social Sciences</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>r3d100000001</td>\n",
" <td>11104 Political Science</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>r3d100000001</td>\n",
" <td>112 Economics</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>r3d100000001</td>\n",
" <td>12 Social and Behavioural Sciences</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
2022-02-14 13:34:42 +01:00
" <th>2791</th>\n",
" <td>r3d100013733</td>\n",
" <td>4 Engineering Sciences</td>\n",
" </tr>\n",
" <tr>\n",
2022-02-14 13:34:42 +01:00
" <th>2792</th>\n",
" <td>r3d100013735</td>\n",
" <td>2 Life Sciences</td>\n",
" </tr>\n",
" <tr>\n",
2022-02-14 13:34:42 +01:00
" <th>2792</th>\n",
" <td>r3d100013735</td>\n",
" <td>204 Microbiology, Virology and Immunology</td>\n",
" </tr>\n",
" <tr>\n",
2022-02-14 13:34:42 +01:00
" <th>2792</th>\n",
" <td>r3d100013735</td>\n",
" <td>21 Biology</td>\n",
" </tr>\n",
" <tr>\n",
2022-02-14 13:34:42 +01:00
" <th>2792</th>\n",
" <td>r3d100013735</td>\n",
" <td>22 Medicine</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
2022-02-14 13:34:42 +01:00
"<p>17032 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
2022-02-14 13:34:42 +01:00
" orgIdentifier subject\n",
"0 r3d100000001 1 Humanities and Social Sciences\n",
"0 r3d100000001 111 Social Sciences\n",
"0 r3d100000001 11104 Political Science\n",
"0 r3d100000001 112 Economics\n",
"0 r3d100000001 12 Social and Behavioural Sciences\n",
"... ... ...\n",
"2791 r3d100013733 4 Engineering Sciences\n",
"2792 r3d100013735 2 Life Sciences\n",
"2792 r3d100013735 204 Microbiology, Virology and Immunology\n",
"2792 r3d100013735 21 Biology\n",
"2792 r3d100013735 22 Medicine\n",
"\n",
"[17032 rows x 2 columns]"
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"re3data_subjects = re3data_df[['orgIdentifier', 'subject']].explode('subject')\n",
"re3data_subjects['subject'] = re3data_subjects['subject'].apply(lambda x: x['name'] if x is not np.nan else np.nan)\n",
"re3data_subjects"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" <script type=\"text/javascript\">\n",
" window.PlotlyConfig = {MathJaxConfig: 'local'};\n",
" if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}\n",
" if (typeof require !== 'undefined') {\n",
" require.undef(\"plotly\");\n",
" define('plotly', function(require, exports, module) {\n",
" /**\n",
2022-03-17 10:33:11 +01:00
"* plotly.js v2.9.0\n",
"* Copyright 2012-2022, Plotly, Inc.\n",
"* All rights reserved.\n",
"* Licensed under the MIT license\n",
"*/\n",
2022-03-17 10:33:11 +01:00
"!function(t){if(\"object\"==typeof exports&&\"undefined\"!=typeof module)module.exports=t();else if(\"function\"==typeof define&&define.amd)define([],t);else{(\"undefined\"!=typeof window?window:\"undefined\"!=typeof global?global:\"undefined\"!=typeof self?self:this).Plotly=t()}}((function(){return function t(e,r,n){function i(o,s){if(!r[o]){if(!e[o]){var l=\"function\"==typeof require&&require;if(!s&&l)return l(o,!0);if(a)return a(o,!0);var c=new Error(\"Cannot find module '\"+o+\"'\");throw c.code=\"MODULE_NOT_FOUND\",c}var u=r[o]={exports:{}};e[o][0].call(u.exports,(function(t){return i(e[o][1][t]||t)}),u,u.exports,t,e,r,n)}return r[o].exports}for(var a=\"function\"==typeof require&&require,o=0;o<n.length;o++)i(n[o]);return i}({1:[function(t,e,r){\"use strict\";var n=t(\"../src/lib\"),i={\"X,X div\":'direction:ltr;font-family:\"Open Sans\",verdana,arial,sans-serif;margin:0;padding:0;',\"X input,X button\":'font-family:\"Open Sans\",verdana,arial,sans-serif;',\"X input:focus,X button:focus\":\"outline:none;\",\"X a\":\"text-decoration:none;\",\"X a:hover\":\"text-decoration:none;\",\"X .crisp\":\"shape-rendering:crispEdges;\",\"X .user-select-none\":\"-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;-o-user-select:none;user-select:none;\",\"X svg\":\"overflow:hidden;\",\"X svg a\":\"fill:#447adb;\",\"X svg a:hover\":\"fill:#3c6dc5;\",\"X .main-svg\":\"position:absolute;top:0;left:0;pointer-events:none;\",\"X .main-svg .draglayer\":\"pointer-events:all;\",\"X .cursor-default\":\"cursor:default;\",\"X .cursor-pointer\":\"cursor:pointer;\",\"X .cursor-crosshair\":\"cursor:crosshair;\",\"X .cursor-move\":\"cursor:move;\",\"X .cursor-col-resize\":\"cursor:col-resize;\",\"X .cursor-row-resize\":\"cursor:row-resize;\",\"X .cursor-ns-resize\":\"cursor:ns-resize;\",\"X .cursor-ew-resize\":\"cursor:ew-resize;\",\"X .cursor-sw-resize\":\"cursor:sw-resize;\",\"X .cursor-s-resize\":\"cursor:s-resize;\",\"X .cursor-se-resize\":\"cursor:se-resize;\",\"X .cursor-w-resize\":\"cursor:w-resize;\",\"X .cursor-e-resize\":\"cursor:e-resize;\",\"X .cursor-nw-resize\":\"cursor:nw-resize;\",\"X .cursor-n-resize\":\"cursor:n-resize;\",\"X .cursor-ne-resize\":\"cursor:ne-resize;\",\"X .cursor-grab\":\"cursor:-webkit-grab;cursor:grab;\",\"X .modebar\":\"position:absolute;top:2px;right:2px;\",\"X .ease-bg\":\"-webkit-transition:background-color .3s ease 0s;-moz-transition:background-color .3s ease 0s;-ms-transition:background-color .3s ease 0s;-o-transition:background-color .3s ease 0s;transition:background-color .3s ease 0s;\",\"X .modebar--hover>:not(.watermark)\":\"opacity:0;-webkit-transition:opacity .3s ease 0s;-moz-transition:opacity .3s ease 0s;-ms-transition:opacity .3s ease 0s;-o-transition:opacity .3s ease 0s;transition:opacity .3s ease 0s;\",\"X:hover .modebar--hover .modebar-group\":\"opacity:1;\",\"X .modebar-group\":\"float:left;display:inline-block;box-sizing:border-box;padding-left:8px;position:relative;vertical-align:middle;white-space:nowrap;\",\"X .modebar-btn\":\"position:relative;font-size:16px;padding:3px 4px;height:22px;cursor:pointer;line-height:normal;box-sizing:border-box;\",\"X .modebar-btn svg\":\"position:relative;top:2px;\",\"X .modebar.vertical\":\"display:flex;flex-direction:column;flex-wrap:wrap;align-content:flex-end;max-height:100%;\",\"X .modebar.vertical svg\":\"top:-1px;\",\"X .modebar.vertical .modebar-group\":\"display:block;float:none;padding-left:0px;padding-bottom:8px;\",\"X .modebar.vertical .modebar-group .modebar-btn\":\"display:block;text-align:center;\",\"X [data-title]:before,X [data-title]:after\":\"position:absolute;-webkit-transform:translate3d(0, 0, 0);-moz-transform:translate3d(0, 0, 0);-ms-transform:translate3d(0, 0, 0);-o-transform:translate3d(0, 0, 0);transform:translate3d(0, 0, 0);display:none;opacity:0;z-index:1001;pointer-events:none;top:110%;right:50%;\",\"X [data-title]:hover:before,X [data-title]:hover:after\":\"display:block;opacity:1;\",\"X [data-title]:before\":'content:\"\";position:absolute;background:transparent;border:6px solid transparent;z-index:10
"/*!\n",
" * The buffer module from node.js, for the browser.\n",
" *\n",
" * @author Feross Aboukhadijeh <feross@feross.org> <http://feross.org>\n",
" * @license MIT\n",
2022-02-14 13:34:42 +01:00
" */function i(t,e){if(t===e)return 0;for(var r=t.length,n=e.length,i=0,a=Math.min(r,n);i<a;++i)if(t[i]!==e[i]){r=t[i],n=e[i];break}return r<n?-1:n<r?1:0}function a(t){return r.Buffer&&\"function\"==typeof r.Buffer.isBuffer?r.Buffer.isBuffer(t):!(null==t||!t._isBuffer)}var o=t(\"util/\"),s=Object.prototype.hasOwnProperty,l=Array.prototype.slice,c=\"foo\"===function(){}.name;function u(t){return Object.prototype.toString.call(t)}function f(t){return!a(t)&&(\"function\"==typeof r.ArrayBuffer&&(\"function\"==typeof ArrayBuffer.isView?ArrayBuffer.isView(t):!!t&&(t instanceof DataView||!!(t.buffer&&t.buffer instanceof ArrayBuffer))))}var h=e.exports=y,p=/\\s*function\\s+([^\\(\\s]*)\\s*/;function d(t){if(o.isFunction(t)){if(c)return t.name;var e=t.toString().match(p);return e&&e[1]}}function g(t,e){return\"string\"==typeof t?t.length<e?t:t.slice(0,e):t}function m(t){if(c||!o.isFunction(t))return o.inspect(t);var e=d(t);return\"[Function\"+(e?\": \"+e:\"\")+\"]\"}function v(t,e,r,n,i){throw new h.AssertionError({message:r,actual:t,expected:e,operator:n,stackStartFunction:i})}function y(t,e){t||v(t,!0,e,\"==\",h.ok)}function x(t,e,r,n){if(t===e)return!0;if(a(t)&&a(e))return 0===i(t,e);if(o.isDate(t)&&o.isDate(e))return t.getTime()===e.getTime();if(o.isRegExp(t)&&o.isRegExp(e))return t.source===e.source&&t.global===e.global&&t.multiline===e.multiline&&t.lastIndex===e.lastIndex&&t.ignoreCase===e.ignoreCase;if(null!==t&&\"object\"==typeof t||null!==e&&\"object\"==typeof e){if(f(t)&&f(e)&&u(t)===u(e)&&!(t instanceof Float32Array||t instanceof Float64Array))return 0===i(new Uint8Array(t.buffer),new Uint8Array(e.buffer));if(a(t)!==a(e))return!1;var s=(n=n||{actual:[],expected:[]}).actual.indexOf(t);return-1!==s&&s===n.expected.indexOf(e)||(n.actual.push(t),n.expected.push(e),function(t,e,r,n){if(null==t||null==e)return!1;if(o.isPrimitive(t)||o.isPrimitive(e))return t===e;if(r&&Object.getPrototypeOf(t)!==Object.getPrototypeOf(e))return!1;var i=b(t),a=b(e);if(i&&!a||!i&&a)return!1;if(i)return t=l.call(t),e=l.call(e),x(t,e,r);var s,c,u=T(t),f=T(e);if(u.length!==f.length)return!1;for(u.sort(),f.sort(),c=u.length-1;c>=0;c--)if(u[c]!==f[c])return!1;for(c=u.length-1;c>=0;c--)if(s=u[c],!x(t[s],e[s],r,n))return!1;return!0}(t,e,r,n))}return r?t===e:t==e}function b(t){return\"[object Arguments]\"==Object.prototype.toString.call(t)}function _(t,e){if(!t||!e)return!1;if(\"[object RegExp]\"==Object.prototype.toString.call(e))return e.test(t);try{if(t instanceof e)return!0}catch(t){}return!Error.isPrototypeOf(e)&&!0===e.call({},t)}function w(t,e,r,n){var i;if(\"function\"!=typeof e)throw new TypeError('\"block\" argument must be a function');\"string\"==typeof r&&(n=r,r=null),i=function(t){var e;try{t()}catch(t){e=t}return e}(e),n=(r&&r.name?\" (\"+r.name+\").\":\".\")+(n?\" \"+n:\".\"),t&&!i&&v(i,r,\"Missing expected exception\"+n);var a=\"string\"==typeof n,s=!t&&i&&!r;if((!t&&o.isError(i)&&a&&_(i,r)||s)&&v(i,r,\"Got unwanted exception\"+n),t&&i&&r&&!_(i,r)||!t&&i)throw i}h.AssertionError=function(t){this.name=\"AssertionError\",this.actual=t.actual,this.expected=t.expected,this.operator=t.operator,t.message?(this.message=t.message,this.generatedMessage=!1):(this.message=function(t){return g(m(t.actual),128)+\" \"+t.operator+\" \"+g(m(t.expected),128)}(this),this.generatedMessage=!0);var e=t.stackStartFunction||v;if(Error.captureStackTrace)Error.captureStackTrace(this,e);else{var r=new Error;if(r.stack){var n=r.stack,i=d(e),a=n.indexOf(\"\\n\"+i);if(a>=0){var o=n.indexOf(\"\\n\",a+1);n=n.substring(o+1)}this.stack=n}}},o.inherits(h.AssertionError,Error),h.fail=v,h.ok=y,h.equal=function(t,e,r){t!=e&&v(t,e,r,\"==\",h.equal)},h.notEqual=function(t,e,r){t==e&&v(t,e,r,\"!=\",h.notEqual)},h.deepEqual=function(t,e,r){x(t,e,!1)||v(t,e,r,\"deepEqual\",h.deepEqual)},h.deepStrictEqual=function(t,e,r){x(t,e,!0)||v(t,e,r,\"deepStrictEqual\",h.deepStrictEqual)},h.notDeepEqual=function(t,e,r){x(t,e,!1)&&v(t,e,r,\"notDeepEqual\",h.notDeepEqual)},h.notDeepStrictEqual=function t(e,r,n){x(e,r,!0)&&v(e,r,n,\"notDeepStrictEqual\",t)},h.strictEqual=function(t,e
"/*!\n",
" * The buffer module from node.js, for the browser.\n",
" *\n",
" * @author Feross Aboukhadijeh <https://feross.org>\n",
" * @license MIT\n",
" */\n",
2022-02-14 13:34:42 +01:00
"\"use strict\";var e=t(\"base64-js\"),n=t(\"ieee754\");r.Buffer=a,r.SlowBuffer=function(t){+t!=t&&(t=0);return a.alloc(+t)},r.INSPECT_MAX_BYTES=50;function i(t){if(t>2147483647)throw new RangeError('The value \"'+t+'\" is invalid for option \"size\"');var e=new Uint8Array(t);return e.__proto__=a.prototype,e}function a(t,e,r){if(\"number\"==typeof t){if(\"string\"==typeof e)throw new TypeError('The \"string\" argument must be of type string. Received type number');return l(t)}return o(t,e,r)}function o(t,e,r){if(\"string\"==typeof t)return function(t,e){\"string\"==typeof e&&\"\"!==e||(e=\"utf8\");if(!a.isEncoding(e))throw new TypeError(\"Unknown encoding: \"+e);var r=0|f(t,e),n=i(r),o=n.write(t,e);o!==r&&(n=n.slice(0,o));return n}(t,e);if(ArrayBuffer.isView(t))return c(t);if(null==t)throw TypeError(\"The first argument must be one of type string, Buffer, ArrayBuffer, Array, or Array-like Object. Received type \"+typeof t);if(B(t,ArrayBuffer)||t&&B(t.buffer,ArrayBuffer))return function(t,e,r){if(e<0||t.byteLength<e)throw new RangeError('\"offset\" is outside of buffer bounds');if(t.byteLength<e+(r||0))throw new RangeError('\"length\" is outside of buffer bounds');var n;n=void 0===e&&void 0===r?new Uint8Array(t):void 0===r?new Uint8Array(t,e):new Uint8Array(t,e,r);return n.__proto__=a.prototype,n}(t,e,r);if(\"number\"==typeof t)throw new TypeError('The \"value\" argument must not be of type number. Received type number');var n=t.valueOf&&t.valueOf();if(null!=n&&n!==t)return a.from(n,e,r);var o=function(t){if(a.isBuffer(t)){var e=0|u(t.length),r=i(e);return 0===r.length||t.copy(r,0,0,e),r}if(void 0!==t.length)return\"number\"!=typeof t.length||N(t.length)?i(0):c(t);if(\"Buffer\"===t.type&&Array.isArray(t.data))return c(t.data)}(t);if(o)return o;if(\"undefined\"!=typeof Symbol&&null!=Symbol.toPrimitive&&\"function\"==typeof t[Symbol.toPrimitive])return a.from(t[Symbol.toPrimitive](\"string\"),e,r);throw new TypeError(\"The first argument must be one of type string, Buffer, ArrayBuffer, Array, or Array-like Object. Received type \"+typeof t)}function s(t){if(\"number\"!=typeof t)throw new TypeError('\"size\" argument must be of type number');if(t<0)throw new RangeError('The value \"'+t+'\" is invalid for option \"size\"')}function l(t){return s(t),i(t<0?0:0|u(t))}function c(t){for(var e=t.length<0?0:0|u(t.length),r=i(e),n=0;n<e;n+=1)r[n]=255&t[n];return r}function u(t){if(t>=2147483647)throw new RangeError(\"Attempt to allocate Buffer larger than maximum size: 0x\"+2147483647..toString(16)+\" bytes\");return 0|t}function f(t,e){if(a.isBuffer(t))return t.length;if(ArrayBuffer.isView(t)||B(t,ArrayBuffer))return t.byteLength;if(\"string\"!=typeof t)throw new TypeError('The \"string\" argument must be one of type string, Buffer, or ArrayBuffer. Received type '+typeof t);var r=t.length,n=arguments.length>2&&!0===arguments[2];if(!n&&0===r)return 0;for(var i=!1;;)switch(e){case\"ascii\":case\"latin1\":case\"binary\":return r;case\"utf8\":case\"utf-8\":return D(t).length;case\"ucs2\":case\"ucs-2\":case\"utf16le\":case\"utf-16le\":return 2*r;case\"hex\":return r>>>1;case\"base64\":return R(t).length;default:if(i)return n?-1:D(t).length;e=(\"\"+e).toLowerCase(),i=!0}}function h(t,e,r){var n=!1;if((void 0===e||e<0)&&(e=0),e>this.length)return\"\";if((void 0===r||r>this.length)&&(r=this.length),r<=0)return\"\";if((r>>>=0)<=(e>>>=0))return\"\";for(t||(t=\"utf8\");;)switch(t){case\"hex\":return M(this,e,r);case\"utf8\":case\"utf-8\":return T(this,e,r);case\"ascii\":return k(this,e,r);case\"latin1\":case\"binary\":return A(this,e,r);case\"base64\":return w(this,e,r);case\"ucs2\":case\"ucs-2\":case\"utf16le\":case\"utf-16le\":return S(this,e,r);default:if(n)throw new TypeError(\"Unknown encoding: \"+t);t=(t+\"\").toLowerCase(),n=!0}}function p(t,e,r){var n=t[e];t[e]=t[r],t[r]=n}function d(t,e,r,n,i){if(0===t.length)return-1;if(\"string\"==typeof r?(n=r,r=0):r>2147483647?r=2147483647:r<-2147483648&&(r=-2147483648),N(r=+r)&&(r=i?0:t.length-1),r<0&&(r=t.length+r),r>=t.length){if(i)return-1;r=t.length-1}else if(r<0){if(!i)return-1;r=0}i
"/*! Native Promise Only\n",
" v0.8.1 (c) Kyle Simpson\n",
" MIT License: http://getify.mit-license.org\n",
"*/\n",
"!function(t,r,n){r[t]=r[t]||n(),void 0!==e&&e.exports&&(e.exports=r[t])}(\"Promise\",void 0!==t?t:this,(function(){\"use strict\";var t,e,n,i=Object.prototype.toString,a=void 0!==r?function(t){return r(t)}:setTimeout;try{Object.defineProperty({},\"x\",{}),t=function(t,e,r,n){return Object.defineProperty(t,e,{value:r,writable:!0,configurable:!1!==n})}}catch(e){t=function(t,e,r){return t[e]=r,t}}function o(t,r){n.add(t,r),e||(e=a(n.drain))}function s(t){var e,r=typeof t;return null==t||\"object\"!=r&&\"function\"!=r||(e=t.then),\"function\"==typeof e&&e}function l(){for(var t=0;t<this.chain.length;t++)c(this,1===this.state?this.chain[t].success:this.chain[t].failure,this.chain[t]);this.chain.length=0}function c(t,e,r){var n,i;try{!1===e?r.reject(t.msg):(n=!0===e?t.msg:e.call(void 0,t.msg))===r.promise?r.reject(TypeError(\"Promise-chain cycle\")):(i=s(n))?i.call(n,r.resolve,r.reject):r.resolve(n)}catch(t){r.reject(t)}}function u(t){var e,r=this;if(!r.triggered){r.triggered=!0,r.def&&(r=r.def);try{(e=s(t))?o((function(){var n=new p(r);try{e.call(t,(function(){u.apply(n,arguments)}),(function(){f.apply(n,arguments)}))}catch(t){f.call(n,t)}})):(r.msg=t,r.state=1,r.chain.length>0&&o(l,r))}catch(t){f.call(new p(r),t)}}}function f(t){var e=this;e.triggered||(e.triggered=!0,e.def&&(e=e.def),e.msg=t,e.state=2,e.chain.length>0&&o(l,e))}function h(t,e,r,n){for(var i=0;i<e.length;i++)!function(i){t.resolve(e[i]).then((function(t){r(i,t)}),n)}(i)}function p(t){this.def=t,this.triggered=!1}function d(t){this.promise=t,this.state=0,this.triggered=!1,this.chain=[],this.msg=void 0}function g(t){if(\"function\"!=typeof t)throw TypeError(\"Not a function\");if(0!==this.__NPO__)throw TypeError(\"Not a promise\");this.__NPO__=1;var e=new d(this);this.then=function(t,r){var n={success:\"function\"!=typeof t||t,failure:\"function\"==typeof r&&r};return n.promise=new this.constructor((function(t,e){if(\"function\"!=typeof t||\"function\"!=typeof e)throw TypeError(\"Not a function\");n.resolve=t,n.reject=e})),e.chain.push(n),0!==e.state&&o(l,e),n.promise},this.catch=function(t){return this.then(void 0,t)};try{t.call(void 0,(function(t){u.call(e,t)}),(function(t){f.call(e,t)}))}catch(t){f.call(e,t)}}n=function(){var t,r,n;function i(t,e){this.fn=t,this.self=e,this.next=void 0}return{add:function(e,a){n=new i(e,a),r?r.next=n:t=n,r=n,n=void 0},drain:function(){var n=t;for(t=r=e=void 0;n;)n.fn.call(n.self),n=n.next}}}();var m=t({},\"constructor\",g,!1);return g.prototype=m,t(m,\"__NPO__\",0,!1),t(g,\"resolve\",(function(t){return t&&\"object\"==typeof t&&1===t.__NPO__?t:new this((function(e,r){if(\"function\"!=typeof e||\"function\"!=typeof r)throw TypeError(\"Not a function\");e(t)}))})),t(g,\"reject\",(function(t){return new this((function(e,r){if(\"function\"!=typeof e||\"function\"!=typeof r)throw TypeError(\"Not a function\");r(t)}))})),t(g,\"all\",(function(t){var e=this;return\"[object Array]\"!=i.call(t)?e.reject(TypeError(\"Not an array\")):0===t.length?e.resolve([]):new e((function(r,n){if(\"function\"!=typeof r||\"function\"!=typeof n)throw TypeError(\"Not a function\");var i=t.length,a=Array(i),o=0;h(e,t,(function(t,e){a[t]=e,++o===i&&r(a)}),n)}))})),t(g,\"race\",(function(t){var e=this;return\"[object Array]\"!=i.call(t)?e.reject(TypeError(\"Not an array\")):new e((function(r,n){if(\"function\"!=typeof r||\"function\"!=typeof n)throw TypeError(\"Not a function\");h(e,t,(function(t,e){r(e)}),n)}))})),g}))}).call(this)}).call(this,\"undefined\"!=typeof global?global:\"undefined\"!=typeof self?self:\"undefined\"!=typeof window?window:{},t(\"timers\").setImmediate)},{timers:311}],246:[function(t,e,r){var n=Math.PI,i=c(120);function a(t,e,r,n){return[\"C\",t,e,r,n,r,n]}function o(t,e,r,n,i,a){return[\"C\",t/3+2/3*r,e/3+2/3*n,i/3+2/3*r,a/3+2/3*n,i,a]}function s(t,e,r,a,o,c,u,f,h,p){if(p)T=p[0],k=p[1],_=p[2],w=p[3];else{var d=l(t,e,-o);t=d.x,e=d.y;var g=(t-(f=(d=l(f,h,-o)).x))/2,m=(e-(h=d.y))/2,v=g*g/(r*r)+m*m/(a*a);v>1&&(r*=v=Math.sqrt(v),a*=v);var y=r*r,x=a*a,b=(c==u?-1:1)*Math.sqrt(Math.abs((y*x-y*m*m-x*g*g)/(y*m*m+x*g*g)));b==1/0&&(
"/*\n",
"object-assign\n",
"(c) Sindre Sorhus\n",
"@license MIT\n",
"*/\n",
"\"use strict\";var n=Object.getOwnPropertySymbols,i=Object.prototype.hasOwnProperty,a=Object.prototype.propertyIsEnumerable;function o(t){if(null==t)throw new TypeError(\"Object.assign cannot be called with null or undefined\");return Object(t)}e.exports=function(){try{if(!Object.assign)return!1;var t=new String(\"abc\");if(t[5]=\"de\",\"5\"===Object.getOwnPropertyNames(t)[0])return!1;for(var e={},r=0;r<10;r++)e[\"_\"+String.fromCharCode(r)]=r;if(\"0123456789\"!==Object.getOwnPropertyNames(e).map((function(t){return e[t]})).join(\"\"))return!1;var n={};return\"abcdefghijklmnopqrst\".split(\"\").forEach((function(t){n[t]=t})),\"abcdefghijklmnopqrst\"===Object.keys(Object.assign({},n)).join(\"\")}catch(t){return!1}}()?Object.assign:function(t,e){for(var r,s,l=o(t),c=1;c<arguments.length;c++){for(var u in r=Object(arguments[c]))i.call(r,u)&&(l[u]=r[u]);if(n){s=n(r);for(var f=0;f<s.length;f++)a.call(r,s[f])&&(l[s[f]]=r[s[f]])}}return l}},{}],248:[function(t,e,r){\"use strict\";function n(t,e){if(\"string\"!=typeof t)return[t];var r=[t];\"string\"==typeof e||Array.isArray(e)?e={brackets:e}:e||(e={});var n=e.brackets?Array.isArray(e.brackets)?e.brackets:[e.brackets]:[\"{}\",\"[]\",\"()\"],i=e.escape||\"___\",a=!!e.flat;n.forEach((function(t){var e=new RegExp([\"\\\\\",t[0],\"[^\\\\\",t[0],\"\\\\\",t[1],\"]*\\\\\",t[1]].join(\"\")),n=[];function a(e,a,o){var s=r.push(e.slice(t[0].length,-t[1].length))-1;return n.push(s),i+s+i}r.forEach((function(t,n){for(var i,o=0;t!=i;)if(i=t,t=t.replace(e,a),o++>1e4)throw Error(\"References have circular dependency. Please, check them.\");r[n]=t})),n=n.reverse(),r=r.map((function(e){return n.forEach((function(r){e=e.replace(new RegExp(\"(\\\\\"+i+r+\"\\\\\"+i+\")\",\"g\"),t[0]+\"$1\"+t[1])})),e}))}));var o=new RegExp(\"\\\\\"+i+\"([0-9]+)\\\\\"+i);return a?r:function t(e,r,n){for(var i,a=[],s=0;i=o.exec(e);){if(s++>1e4)throw Error(\"Circular references in parenthesis\");a.push(e.slice(0,i.index)),a.push(t(r[i[1]],r)),e=e.slice(i.index+i[0].length)}return a.push(e),a}(r[0],r)}function i(t,e){if(e&&e.flat){var r,n=e&&e.escape||\"___\",i=t[0];if(!i)return\"\";for(var a=new RegExp(\"\\\\\"+n+\"([0-9]+)\\\\\"+n),o=0;i!=r;){if(o++>1e4)throw Error(\"Circular references in \"+t);r=i,i=i.replace(a,s)}return i}return t.reduce((function t(e,r){return Array.isArray(r)&&(r=r.reduce(t,\"\")),e+r}),\"\");function s(e,r){if(null==t[r])throw Error(\"Reference \"+r+\"is undefined\");return t[r]}}function a(t,e){return Array.isArray(t)?i(t,e):n(t,e)}a.parse=n,a.stringify=i,e.exports=a},{}],249:[function(t,e,r){\"use strict\";var n=t(\"pick-by-alias\");e.exports=function(t){var e;arguments.length>1&&(t=arguments);\"string\"==typeof t?t=t.split(/\\s/).map(parseFloat):\"number\"==typeof t&&(t=[t]);t.length&&\"number\"==typeof t[0]?e=1===t.length?{width:t[0],height:t[0],x:0,y:0}:2===t.length?{width:t[0],height:t[1],x:0,y:0}:{x:t[0],y:t[1],width:t[2]-t[0]||0,height:t[3]-t[1]||0}:t&&(t=n(t,{left:\"x l left Left\",top:\"y t top Top\",width:\"w width W Width\",height:\"h height W Width\",bottom:\"b bottom Bottom\",right:\"r right Right\"}),e={x:t.left||0,y:t.top||0},null==t.width?t.right?e.width=t.right-e.x:e.width=0:e.width=t.width,null==t.height?t.bottom?e.height=t.bottom-e.y:e.height=0:e.height=t.height);return e}},{\"pick-by-alias\":253}],250:[function(t,e,r){e.exports=function(t){var e=[];return t.replace(i,(function(t,r,i){var o=r.toLowerCase();for(i=function(t){var e=t.match(a);return e?e.map(Number):[]}(i),\"m\"==o&&i.length>2&&(e.push([r].concat(i.splice(0,2))),o=\"l\",r=\"m\"==r?\"l\":\"L\");;){if(i.length==n[o])return i.unshift(r),e.push(i);if(i.length<n[o])throw new Error(\"malformed path data\");e.push([r].concat(i.splice(0,n[o])))}})),e};var n={a:7,c:6,h:1,l:2,m:2,q:4,s:4,t:2,v:1,z:0},i=/([astvzqmhlc])([^astvzqmhlc]*)/gi;var a=/-?[0-9]*\\.?[0-9]+(?:e[-+]?\\d+)?/gi},{}],251:[function(t,e,r){e.exports=function(t,e){e||(e=[0,\"\"]),t=String(t);var r=parseFloat(t,10);return e[0]=r,e[1]=t.match(/[\\d.\\-\\+]*\\s*(.*)/)[1]||\"\",e}},{}],252:[function(t,e,r){(function(t){(function(){(function(){var r,n,
"/*\n",
" * @copyright 2016 Sean Connelly (@voidqk), http://syntheti.cc\n",
" * @license MIT\n",
" * @preserve Project Home: https://github.com/voidqk/polybooljs\n",
" */\n",
2022-03-17 10:33:11 +01:00
"var n,i=t(\"./lib/build-log\"),a=t(\"./lib/epsilon\"),o=t(\"./lib/intersecter\"),s=t(\"./lib/segment-chainer\"),l=t(\"./lib/segment-selector\"),c=t(\"./lib/geojson\"),u=!1,f=a();function h(t,e,r){var i=n.segments(t),a=n.segments(e),o=r(n.combine(i,a));return n.polygon(o)}n={buildLog:function(t){return!0===t?u=i():!1===t&&(u=!1),!1!==u&&u.list},epsilon:function(t){return f.epsilon(t)},segments:function(t){var e=o(!0,f,u);return t.regions.forEach(e.addRegion),{segments:e.calculate(t.inverted),inverted:t.inverted}},combine:function(t,e){return{combined:o(!1,f,u).calculate(t.segments,t.inverted,e.segments,e.inverted),inverted1:t.inverted,inverted2:e.inverted}},selectUnion:function(t){return{segments:l.union(t.combined,u),inverted:t.inverted1||t.inverted2}},selectIntersect:function(t){return{segments:l.intersect(t.combined,u),inverted:t.inverted1&&t.inverted2}},selectDifference:function(t){return{segments:l.difference(t.combined,u),inverted:t.inverted1&&!t.inverted2}},selectDifferenceRev:function(t){return{segments:l.differenceRev(t.combined,u),inverted:!t.inverted1&&t.inverted2}},selectXor:function(t){return{segments:l.xor(t.combined,u),inverted:t.inverted1!==t.inverted2}},polygon:function(t){return{regions:s(t.segments,f,u),inverted:t.inverted}},polygonFromGeoJSON:function(t){return c.toPolygon(n,t)},polygonToGeoJSON:function(t){return c.fromPolygon(n,f,t)},union:function(t,e){return h(t,e,n.selectUnion)},intersect:function(t,e){return h(t,e,n.selectIntersect)},difference:function(t,e){return h(t,e,n.selectDifference)},differenceRev:function(t,e){return h(t,e,n.selectDifferenceRev)},xor:function(t,e){return h(t,e,n.selectXor)}},\"object\"==typeof window&&(window.PolyBool=n),e.exports=n},{\"./lib/build-log\":255,\"./lib/epsilon\":256,\"./lib/geojson\":257,\"./lib/intersecter\":258,\"./lib/segment-chainer\":260,\"./lib/segment-selector\":261}],255:[function(t,e,r){e.exports=function(){var t,e=0,r=!1;function n(e,r){return t.list.push({type:e,data:r?JSON.parse(JSON.stringify(r)):void 0}),t}return t={list:[],segmentId:function(){return e++},checkIntersection:function(t,e){return n(\"check\",{seg1:t,seg2:e})},segmentChop:function(t,e){return n(\"div_seg\",{seg:t,pt:e}),n(\"chop\",{seg:t,pt:e})},statusRemove:function(t){return n(\"pop_seg\",{seg:t})},segmentUpdate:function(t){return n(\"seg_update\",{seg:t})},segmentNew:function(t,e){return n(\"new_seg\",{seg:t,primary:e})},segmentRemove:function(t){return n(\"rem_seg\",{seg:t})},tempStatus:function(t,e,r){return n(\"temp_status\",{seg:t,above:e,below:r})},rewind:function(t){return n(\"rewind\",{seg:t})},status:function(t,e,r){return n(\"status\",{seg:t,above:e,below:r})},vert:function(e){return e===r?t:(r=e,n(\"vert\",{x:e}))},log:function(t){return\"string\"!=typeof t&&(t=JSON.stringify(t,!1,\" \")),n(\"log\",{txt:t})},reset:function(){return n(\"reset\")},selected:function(t){return n(\"selected\",{segs:t})},chainStart:function(t){return n(\"chain_start\",{seg:t})},chainRemoveHead:function(t,e){return n(\"chain_rem_head\",{index:t,pt:e})},chainRemoveTail:function(t,e){return n(\"chain_rem_tail\",{index:t,pt:e})},chainNew:function(t,e){return n(\"chain_new\",{pt1:t,pt2:e})},chainMatch:function(t){return n(\"chain_match\",{index:t})},chainClose:function(t){return n(\"chain_close\",{index:t})},chainAddHead:function(t,e){return n(\"chain_add_head\",{index:t,pt:e})},chainAddTail:function(t,e){return n(\"chain_add_tail\",{index:t,pt:e})},chainConnect:function(t,e){return n(\"chain_con\",{index1:t,index2:e})},chainReverse:function(t){return n(\"chain_rev\",{index:t})},chainJoin:function(t,e){return n(\"chain_join\",{index1:t,index2:e})},done:function(){return n(\"done\")}}}},{}],256:[function(t,e,r){e.exports=function(t){\"number\"!=typeof t&&(t=1e-10);var e={epsilon:function(e){return\"number\"==typeof e&&(t=e),t},pointAboveOrOnLine:function(e,r,n){var i=r[0],a=r[1],o=n[0],s=n[1],l=e[0];return(o-i)*(e[1]-a)-(s-a)*(l-i)>=-t},pointBetween:function(e,r,n){var i=e[1]-r[1],a=n[0]-r[0],o=e[0]-r[0],s=n[1]-r[1],l=o*a+i*s;return!(l<t)&&!(l-(a*a+s*s)>-t)},pointsSameX:function(e,r){r
"/*!\n",
2022-02-14 13:34:42 +01:00
" * The buffer module from node.js, for the browser.\n",
" *\n",
" * @author Feross Aboukhadijeh <https://feross.org>\n",
" * @license MIT\n",
" */\n",
2022-02-14 13:34:42 +01:00
"\"use strict\";var e=t(\"base64-js\"),n=t(\"ieee754\");r.Buffer=a,r.SlowBuffer=function(t){+t!=t&&(t=0);return a.alloc(+t)},r.INSPECT_MAX_BYTES=50;function i(t){if(t>2147483647)throw new RangeError('The value \"'+t+'\" is invalid for option \"size\"');var e=new Uint8Array(t);return e.__proto__=a.prototype,e}function a(t,e,r){if(\"number\"==typeof t){if(\"string\"==typeof e)throw new TypeError('The \"string\" argument must be of type string. Received type number');return l(t)}return o(t,e,r)}function o(t,e,r){if(\"string\"==typeof t)return function(t,e){\"string\"==typeof e&&\"\"!==e||(e=\"utf8\");if(!a.isEncoding(e))throw new TypeError(\"Unknown encoding: \"+e);var r=0|f(t,e),n=i(r),o=n.write(t,e);o!==r&&(n=n.slice(0,o));return n}(t,e);if(ArrayBuffer.isView(t))return c(t);if(null==t)throw TypeError(\"The first argument must be one of type string, Buffer, ArrayBuffer, Array, or Array-like Object. Received type \"+typeof t);if(B(t,ArrayBuffer)||t&&B(t.buffer,ArrayBuffer))return function(t,e,r){if(e<0||t.byteLength<e)throw new RangeError('\"offset\" is outside of buffer bounds');if(t.byteLength<e+(r||0))throw new RangeError('\"length\" is outside of buffer bounds');var n;n=void 0===e&&void 0===r?new Uint8Array(t):void 0===r?new Uint8Array(t,e):new Uint8Array(t,e,r);return n.__proto__=a.prototype,n}(t,e,r);if(\"number\"==typeof t)throw new TypeError('The \"value\" argument must not be of type number. Received type number');var n=t.valueOf&&t.valueOf();if(null!=n&&n!==t)return a.from(n,e,r);var o=function(t){if(a.isBuffer(t)){var e=0|u(t.length),r=i(e);return 0===r.length||t.copy(r,0,0,e),r}if(void 0!==t.length)return\"number\"!=typeof t.length||N(t.length)?i(0):c(t);if(\"Buffer\"===t.type&&Array.isArray(t.data))return c(t.data)}(t);if(o)return o;if(\"undefined\"!=typeof Symbol&&null!=Symbol.toPrimitive&&\"function\"==typeof t[Symbol.toPrimitive])return a.from(t[Symbol.toPrimitive](\"string\"),e,r);throw new TypeError(\"The first argument must be one of type string, Buffer, ArrayBuffer, Array, or Array-like Object. Received type \"+typeof t)}function s(t){if(\"number\"!=typeof t)throw new TypeError('\"size\" argument must be of type number');if(t<0)throw new RangeError('The value \"'+t+'\" is invalid for option \"size\"')}function l(t){return s(t),i(t<0?0:0|u(t))}function c(t){for(var e=t.length<0?0:0|u(t.length),r=i(e),n=0;n<e;n+=1)r[n]=255&t[n];return r}function u(t){if(t>=2147483647)throw new RangeError(\"Attempt to allocate Buffer larger than maximum size: 0x\"+2147483647..toString(16)+\" bytes\");return 0|t}function f(t,e){if(a.isBuffer(t))return t.length;if(ArrayBuffer.isView(t)||B(t,ArrayBuffer))return t.byteLength;if(\"string\"!=typeof t)throw new TypeError('The \"string\" argument must be one of type string, Buffer, or ArrayBuffer. Received type '+typeof t);var r=t.length,n=arguments.length>2&&!0===arguments[2];if(!n&&0===r)return 0;for(var i=!1;;)switch(e){case\"ascii\":case\"latin1\":case\"binary\":return r;case\"utf8\":case\"utf-8\":return D(t).length;case\"ucs2\":case\"ucs-2\":case\"utf16le\":case\"utf-16le\":return 2*r;case\"hex\":return r>>>1;case\"base64\":return R(t).length;default:if(i)return n?-1:D(t).length;e=(\"\"+e).toLowerCase(),i=!0}}function h(t,e,r){var n=!1;if((void 0===e||e<0)&&(e=0),e>this.length)return\"\";if((void 0===r||r>this.length)&&(r=this.length),r<=0)return\"\";if((r>>>=0)<=(e>>>=0))return\"\";for(t||(t=\"utf8\");;)switch(t){case\"hex\":return M(this,e,r);case\"utf8\":case\"utf-8\":return T(this,e,r);case\"ascii\":return k(this,e,r);case\"latin1\":case\"binary\":return A(this,e,r);case\"base64\":return w(this,e,r);case\"ucs2\":case\"ucs-2\":case\"utf16le\":case\"utf-16le\":return S(this,e,r);default:if(n)throw new TypeError(\"Unknown encoding: \"+t);t=(t+\"\").toLowerCase(),n=!0}}function p(t,e,r){var n=t[e];t[e]=t[r],t[r]=n}function d(t,e,r,n,i){if(0===t.length)return-1;if(\"string\"==typeof r?(n=r,r=0):r>2147483647?r=2147483647:r<-2147483648&&(r=-2147483648),N(r=+r)&&(r=i?0:t.length-1),r<0&&(r=t.length+r),r>=t.length){if(i)return-1;r=t.length-1}else if(r<0){if(!i)return-1;r=0}i
"/*!\n",
" * Determine if an object is a Buffer\n",
" *\n",
" * @author Feross Aboukhadijeh <https://feross.org>\n",
" * @license MIT\n",
" */\n",
2022-02-14 13:34:42 +01:00
"e.exports=function(t){return null!=t&&(n(t)||function(t){return\"function\"==typeof t.readFloatLE&&\"function\"==typeof t.slice&&n(t.slice(0,0))}(t)||!!t._isBuffer)}},{}],238:[function(t,e,r){\"use strict\";e.exports=a,e.exports.isMobile=a,e.exports.default=a;var n=/(android|bb\\d+|meego).+mobile|avantgo|bada\\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|series[46]0|symbian|treo|up\\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino/i,i=/(android|bb\\d+|meego).+mobile|avantgo|bada\\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|series[46]0|symbian|treo|up\\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino|android|ipad|playbook|silk/i;function a(t){t||(t={});var e=t.ua;if(e||\"undefined\"==typeof navigator||(e=navigator.userAgent),e&&e.headers&&\"string\"==typeof e.headers[\"user-agent\"]&&(e=e.headers[\"user-agent\"]),\"string\"!=typeof e)return!1;var r=t.tablet?i.test(e):n.test(e);return!r&&t.tablet&&t.featureDetect&&navigator&&navigator.maxTouchPoints>1&&-1!==e.indexOf(\"Macintosh\")&&-1!==e.indexOf(\"Safari\")&&(r=!0),r}},{}],239:[function(t,e,r){\"use strict\";e.exports=function(t){for(var e,r=t.length,n=0;n<r;n++)if(((e=t.charCodeAt(n))<9||e>13)&&32!==e&&133!==e&&160!==e&&5760!==e&&6158!==e&&(e<8192||e>8205)&&8232!==e&&8233!==e&&8239!==e&&8287!==e&&8288!==e&&12288!==e&&65279!==e)return!1;return!0}},{}],240:[function(t,e,r){e.exports=function(t,e,r){return t*(1-r)+e*r}},{}],241:[function(t,e,r){var n=t(\"./normalize\"),i=t(\"gl-mat4/create\"),a=t(\"gl-mat4/clone\"),o=t(\"gl-mat4/determinant\"),s=t(\"gl-mat4/invert\"),l=t(\"gl-mat4/transpose\"),c={length:t(\"gl-vec3/length\"),normalize:t(\"gl-vec3/normalize\"),dot:t(\"gl-vec3/dot\"),cross:t(\"gl-vec3/cross\")},u=i(),f=i(),h=[0,0,0,0],p=[[0,0,0],[0,0,0],[0,0,0]],d=[0,0,0];function g(t,e,r,n,i){t[0]=e[0]*n+r[0]*i,t[1]=e[1]*n+r[1]*i,t[2]=e[2]*n+r[2]*i}e.exports=function(t,e,r,i,m,v){if(e||(e=[0,0,0]),r||(r=[0,0,0]),i||(i=[0,0,0]),m||(m=[0,0,0,1]),v||(v=[0,0,0,1]),!n(u,t))return!1;if(a(f,u),f[3]=0,f[7]=0,f[11]=0,f[15]=1,Math.abs(o(f)<1e-8))return!1;var y,x,b,_,w,T,k,A=u[3],M=u[7],S=u[11],E=u[12],L=u[13],C=u[14],P=u[15];if(0!==A||0!==M||0!==S){if(h[0]=A,h[1]=M,h[2]=S,h[3]=P,!s(f,f))return!1;l(f,f),y=m,b=f,_=(x=h)[0],w=x[1],T=x[2],k=x[3],y[0]=b[0]*_+b[4]*w+b[8]*T+b[12]*k,y[1]=b[1]*_+b[5]*w+b[9]*T+b[13]*k,y[2]=b[2]*_+b[6]*w+b[10]*T+b[14]*k,y[3]=b[3]*_+b[7]*w+b[11]*T+b[15]*k}else m[0]=m[1]=m[2]=0,m[3]=1;if(e[0]=E,e[1]=L,e[2]=C,function(t,e){t[0][0]=e[0],t[0][1]=e[1],t[0][2]=e[2],t[1][0]=e[4],t[1][1]=e[5],t[1][2]=e[6],t[2][0]=e[8],t[2][1]=e[9],t[2][2]=e[10]}(p,u),r[0]=c.length(p[0]),c.normalize(p[0],p[0]),i[0]=c.dot(p[0],p[1]),g(p[1],p[1],p[0],1,-i[0]),r[1]=c.length(p[1]),c.normalize(p[1],p[1]),i[0]/=r[1],i[1]=c.dot(p[0],p[2]),g(p[2],p[2],p[0],1,-i[1]),i[2]=c.dot(p[1],p[2]),g(p[2],p[2],p[1],1,-i[2]),r[2]=c.length(p[2]),c.normalize(p[2],p[2]),i[1]/=r[2],i[2]/=r[2],c.cross(d,p[1],p[2]),c.dot(p[0],d)<0)for(var I=0;I<3;I++)r[I]*=-1,p[I][0]*=-1,p[I][1]*=-1,p[I][2]*=-1;return v[0]=.5*Math.sqrt(Math.max(1+p[0][0]-p[1][1]-p[2][2],0)),v[1]=.5*Math.sqrt(Math.max(1-p[0][0]+p[1][1]-p[2][2],0)),v[2]=.5*Math.sqrt(Math.max(1-p[0][0]-p[1][1]+p[2][2],0)),v[3]=.5*Math.sqrt(Math.max(1+p[0][0]+p[1][1]+p[2][2],0)),p[2][1]>p[1][2]&&(v[0]=-v[0]),p[0][2]>p[2][0]&&(v[1]=-v[1]),p[1][0]>p[0][1]&&(v[2]=-v[2]),!0}},{\"./normalize\":242,\"gl-mat4/clone\":92,\"gl-mat4/create\":93,\"gl-mat4/determinant\":94,\"gl-mat4/invert\":98,\"gl-mat4/transpose\":109,\"gl-vec3/cross\":157,\"gl-vec3/dot\":162,\"gl-vec3/length\":172,\"gl-vec3/normalize\":179}],242:[function(t,e,r){e.exports=function(t,e){var r=e[15];if(0===r)return!1;for(var n=1/r,i=0;i<16;i++)t[i]=e[i]*n;return!0}},{}],243:[function(t,e,r){var n=t(\"gl-vec3/lerp\"),i=t(\"mat4-recompose\"),a=t(\"mat4-decompose\"),o=t(\"gl-mat4/determinant\"),s=t(\"
"/*!\n",
" * pad-left <https://github.com/jonschlinkert/pad-left>\n",
" *\n",
" * Copyright (c) 2014-2015, Jon Schlinkert.\n",
" * Licensed under the MIT license.\n",
" */\n",
2022-02-14 13:34:42 +01:00
"\"use strict\";var n=t(\"repeat-string\");e.exports=function(t,e,r){return n(r=void 0!==r?r+\"\":\" \",e)+t}},{\"repeat-string\":277}],265:[function(t,e,r){e.exports=function(t,e){e||(e=[0,\"\"]),t=String(t);var r=parseFloat(t,10);return e[0]=r,e[1]=t.match(/[\\d.\\-\\+]*\\s*(.*)/)[1]||\"\",e}},{}],266:[function(t,e,r){\"use strict\";e.exports=function(t,e){for(var r=0|e.length,i=t.length,a=[new Array(r),new Array(r)],o=0;o<r;++o)a[0][o]=[],a[1][o]=[];for(o=0;o<i;++o){var s=t[o];a[0][s[0]].push(s),a[1][s[1]].push(s)}var l=[];for(o=0;o<r;++o)a[0][o].length+a[1][o].length===0&&l.push([o]);function c(t,e){var r=a[e][t[e]];r.splice(r.indexOf(t),1)}function u(t,r,i){for(var o,s,l,u=0;u<2;++u)if(a[u][r].length>0){o=a[u][r][0],l=u;break}s=o[1^l];for(var f=0;f<2;++f)for(var h=a[f][r],p=0;p<h.length;++p){var d=h[p],g=d[1^f];n(e[t],e[r],e[s],e[g])>0&&(o=d,s=g,l=f)}return i||o&&c(o,l),s}function f(t,r){var i=a[r][t][0],o=[t];c(i,r);for(var s=i[1^r];;){for(;s!==t;)o.push(s),s=u(o[o.length-2],s,!1);if(a[0][t].length+a[1][t].length===0)break;var l=o[o.length-1],f=t,h=o[1],p=u(l,f,!0);if(n(e[l],e[f],e[h],e[p])<0)break;o.push(t),s=u(l,f)}return o}function h(t,e){return e[1]===e[e.length-1]}for(o=0;o<r;++o)for(var p=0;p<2;++p){for(var d=[];a[p][o].length>0;){a[0][o].length;var g=f(o,p);h(0,g)?d.push.apply(d,g):(d.length>0&&l.push(d),d=g)}d.length>0&&l.push(d)}return l};var n=t(\"compare-angle\")},{\"compare-angle\":54}],267:[function(t,e,r){\"use strict\";e.exports=function(t,e){for(var r=n(t,e.length),i=new Array(e.length),a=new Array(e.length),o=[],s=0;s<e.length;++s){var l=r[s].length;a[s]=l,i[s]=!0,l<=1&&o.push(s)}for(;o.length>0;){var c=o.pop();i[c]=!1;var u=r[c];for(s=0;s<u.length;++s){var f=u[s];0==--a[f]&&o.push(f)}}var h=new Array(e.length),p=[];for(s=0;s<e.length;++s)if(i[s]){c=p.length;h[s]=c,p.push(e[s])}else h[s]=-1;var d=[];for(s=0;s<t.length;++s){var g=t[s];i[g[0]]&&i[g[1]]&&d.push([h[g[0]],h[g[1]]])}return[d,p]};var n=t(\"edges-to-adjacency-list\")},{\"edges-to-adjacency-list\":66}],268:[function(t,e,r){\"use strict\";e.exports=function(t,e){var r=c(t,e);t=r[0];for(var f=(e=r[1]).length,h=(t.length,n(t,e.length)),p=0;p<f;++p)if(h[p].length%2==1)throw new Error(\"planar-graph-to-polyline: graph must be manifold\");var d=i(t,e);var g=(d=d.filter((function(t){for(var r=t.length,n=[0],i=0;i<r;++i){var a=e[t[i]],l=e[t[(i+1)%r]],c=o(-a[0],a[1]),u=o(-a[0],l[1]),f=o(l[0],a[1]),h=o(l[0],l[1]);n=s(n,s(s(c,u),s(f,h)))}return n[n.length-1]>0}))).length,m=new Array(g),v=new Array(g);for(p=0;p<g;++p){m[p]=p;var y=new Array(g),x=d[p].map((function(t){return e[t]})),b=a([x]),_=0;t:for(var w=0;w<g;++w)if(y[w]=0,p!==w){for(var T=(H=d[w]).length,k=0;k<T;++k){var A=b(e[H[k]]);if(0!==A){A<0&&(y[w]=1,_+=1);continue t}}y[w]=1,_+=1}v[p]=[_,p,y]}v.sort((function(t,e){return e[0]-t[0]}));for(p=0;p<g;++p){var M=(y=v[p])[1],S=y[2];for(w=0;w<g;++w)S[w]&&(m[w]=M)}var E=function(t){for(var e=new Array(t),r=0;r<t;++r)e[r]=[];return e}(g);for(p=0;p<g;++p)E[p].push(m[p]),E[m[p]].push(p);var L={},C=u(f,!1);for(p=0;p<g;++p)for(T=(H=d[p]).length,w=0;w<T;++w){var P=H[w],I=H[(w+1)%T],O=Math.min(P,I)+\":\"+Math.max(P,I);if(O in L){var z=L[O];E[z].push(p),E[p].push(z),C[P]=C[I]=!0}else L[O]=p}function D(t){for(var e=t.length,r=0;r<e;++r)if(!C[t[r]])return!1;return!0}var R=[],F=u(g,-1);for(p=0;p<g;++p)m[p]!==p||D(d[p])?F[p]=-1:(R.push(p),F[p]=0);r=[];for(;R.length>0;){var B=R.pop(),N=E[B];l(N,(function(t,e){return t-e}));var j,U=N.length,V=F[B];if(0===V){var H=d[B];j=[H]}for(p=0;p<U;++p){var q=N[p];if(!(F[q]>=0))if(F[q]=1^V,R.push(q),0===V)D(H=d[q])||(H.reverse(),j.push(H))}0===V&&r.push(j)}return r};var n=t(\"edges-to-adjacency-list\"),i=t(\"planar-dual\"),a=t(\"point-in-big-polygon\"),o=t(\"two-product\"),s=t(\"robust-sum\"),l=t(\"uniq\"),c=t(\"./lib/trim-leaves\");function u(t,e){for(var r=new Array(t),n=0;n<t;++n)r[n]=e;return r}},{\"./lib/trim-leaves\":267,\"edges-to-adjacency-list\":66,\"planar-dual\":266,\"point-in-big-polygon\":269,\"robust-sum\":289,\"two-product\":306,uniq:310}],269:[function(t,e,r){e.exports=function(t){for(var e=t.length,r=[]
"/*!\n",
" * repeat-string <https://github.com/jonschlinkert/repeat-string>\n",
" *\n",
" * Copyright (c) 2014-2015, Jon Schlinkert.\n",
" * Licensed under the MIT License.\n",
" */\n",
2022-02-14 13:34:42 +01:00
"\"use strict\";var n,i=\"\";e.exports=function(t,e){if(\"string\"!=typeof t)throw new TypeError(\"expected a string\");if(1===e)return t;if(2===e)return t+t;var r=t.length*e;if(n!==t||void 0===n)n=t,i=\"\";else if(i.length>=r)return i.substr(0,r);for(;r>i.length&&e>1;)1&e&&(i+=t),e>>=1,t+=t;return i=(i+=t).substr(0,r)}},{}],278:[function(t,e,r){(function(t){(function(){e.exports=t.performance&&t.performance.now?function(){return performance.now()}:Date.now||function(){return+new Date}}).call(this)}).call(this,void 0!==n?n:\"undefined\"!=typeof self?self:\"undefined\"!=typeof window?window:{})},{}],279:[function(t,e,r){\"use strict\";e.exports=function(t){for(var e=t.length,r=t[t.length-1],n=e,i=e-2;i>=0;--i){var a=r,o=t[i];(l=o-((r=a+o)-a))&&(t[--n]=r,r=l)}var s=0;for(i=n;i<e;++i){var l;a=t[i];(l=(o=r)-((r=a+o)-a))&&(t[s++]=l)}return t[s++]=r,t.length=s,t}},{}],280:[function(t,e,r){\"use strict\";var n=t(\"two-product\"),i=t(\"robust-sum\"),a=t(\"robust-scale\"),o=t(\"robust-compress\");function s(t,e,r,n){return function(e){return n(t(r(e[0][0],e[1][1]),r(-e[0][1],e[1][0])))}}function l(t,e,r,n){return function(i){return n(t(e(t(r(i[1][1],i[2][2]),r(-i[1][2],i[2][1])),i[0][0]),t(e(t(r(i[1][0],i[2][2]),r(-i[1][2],i[2][0])),-i[0][1]),e(t(r(i[1][0],i[2][1]),r(-i[1][1],i[2][0])),i[0][2]))))}}function c(t){return(2===t?s:3===t?l:void 0)(i,a,n,o)}var u=[function(){return[0]},function(t){return[t[0][0]]}];function f(t,e,r,n,i,a){return function(o){switch(o.length){case 0:return t(o);case 1:return e(o);case 2:return r(o);case 3:return n(o)}var s=i[o.length];return s||(s=i[o.length]=a(o.length)),s(o)}}!function(){for(;u.length<4;)u.push(c(u.length));e.exports=f.apply(void 0,u.concat([u,c]));for(var t=0;t<u.length;++t)e.exports[t]=u[t]}()},{\"robust-compress\":279,\"robust-scale\":286,\"robust-sum\":289,\"two-product\":306}],281:[function(t,e,r){\"use strict\";var n=t(\"two-product\"),i=t(\"robust-sum\");e.exports=function(t,e){for(var r=n(t[0],e[0]),a=1;a<t.length;++a)r=i(r,n(t[a],e[a]));return r}},{\"robust-sum\":289,\"two-product\":306}],282:[function(t,e,r){\"use strict\";var n=t(\"two-product\"),i=t(\"robust-sum\"),a=t(\"robust-subtract\"),o=t(\"robust-scale\");function s(t){return(3===t?l:4===t?c:5===t?u:f)(i,a,n,o)}function l(t,e,r,n){return function(i,a,o){var s=r(i[0],i[0]),l=n(s,a[0]),c=n(s,o[0]),u=r(a[0],a[0]),f=n(u,i[0]),h=n(u,o[0]),p=r(o[0],o[0]),d=n(p,i[0]),g=n(p,a[0]),m=t(e(g,h),e(f,l)),v=e(d,c),y=e(m,v);return y[y.length-1]}}function c(t,e,r,n){return function(i,a,o,s){var l=t(r(i[0],i[0]),r(i[1],i[1])),c=n(l,a[0]),u=n(l,o[0]),f=n(l,s[0]),h=t(r(a[0],a[0]),r(a[1],a[1])),p=n(h,i[0]),d=n(h,o[0]),g=n(h,s[0]),m=t(r(o[0],o[0]),r(o[1],o[1])),v=n(m,i[0]),y=n(m,a[0]),x=n(m,s[0]),b=t(r(s[0],s[0]),r(s[1],s[1])),_=n(b,i[0]),w=n(b,a[0]),T=n(b,o[0]),k=t(t(n(e(T,x),a[1]),t(n(e(w,g),-o[1]),n(e(y,d),s[1]))),t(n(e(w,g),i[1]),t(n(e(_,f),-a[1]),n(e(p,c),s[1])))),A=t(t(n(e(T,x),i[1]),t(n(e(_,f),-o[1]),n(e(v,u),s[1]))),t(n(e(y,d),i[1]),t(n(e(v,u),-a[1]),n(e(p,c),o[1])))),M=e(k,A);return M[M.length-1]}}function u(t,e,r,n){return function(i,a,o,s,l){var c=t(r(i[0],i[0]),t(r(i[1],i[1]),r(i[2],i[2]))),u=n(c,a[0]),f=n(c,o[0]),h=n(c,s[0]),p=n(c,l[0]),d=t(r(a[0],a[0]),t(r(a[1],a[1]),r(a[2],a[2]))),g=n(d,i[0]),m=n(d,o[0]),v=n(d,s[0]),y=n(d,l[0]),x=t(r(o[0],o[0]),t(r(o[1],o[1]),r(o[2],o[2]))),b=n(x,i[0]),_=n(x,a[0]),w=n(x,s[0]),T=n(x,l[0]),k=t(r(s[0],s[0]),t(r(s[1],s[1]),r(s[2],s[2]))),A=n(k,i[0]),M=n(k,a[0]),S=n(k,o[0]),E=n(k,l[0]),L=t(r(l[0],l[0]),t(r(l[1],l[1]),r(l[2],l[2]))),C=n(L,i[0]),P=n(L,a[0]),I=n(L,o[0]),O=n(L,s[0]),z=t(t(t(n(t(n(e(O,E),o[1]),t(n(e(I,T),-s[1]),n(e(S,w),l[1]))),a[2]),t(n(t(n(e(O,E),a[1]),t(n(e(P,y),-s[1]),n(e(M,v),l[1]))),-o[2]),n(t(n(e(I,T),a[1]),t(n(e(P,y),-o[1]),n(e(_,m),l[1]))),s[2]))),t(n(t(n(e(S,w),a[1]),t(n(e(M,v),-o[1]),n(e(_,m),s[1]))),-l[2]),t(n(t(n(e(O,E),a[1]),t(n(e(P,y),-s[1]),n(e(M,v),l[1]))),i[2]),n(t(n(e(O,E),i[1]),t(n(e(C,p),-s[1]),n(e(A,h),l[1]))),-a[2])))),t(t(n(t(n(e(P,y),i[1]),t(n(e(C,p),-a[1]),n(e(g,u),l[1]))),s[2]),t(n(t(n(e(M,v),i[1]),t(n(e(A,h),-a[1]),n(e(g,u),s[1]))),-l[2]),n(t(n(e(S,
" });\n",
" require(['plotly'], function(Plotly) {\n",
" window._Plotly = Plotly;\n",
" });\n",
" }\n",
" </script>\n",
" "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"name": "re3data tier 1-digits",
"type": "bar",
"x": [
"4 Engineering Sciences",
"3 Natural Sciences",
"2 Life Sciences",
"1 Humanities and Social Sciences"
],
"y": [
2022-02-14 13:34:42 +01:00
546,
1398,
1542,
1023
]
},
{
"name": "re3data tier 2-digits",
"type": "bar",
"x": [
"45 Construction Engineering and Architecture",
"44 Computer Science, Electrical and System Engineering",
"43 Materials Science and Engineering",
"42 Thermal Engineering/Process Engineering",
"41 Mechanical and industrial Engineering",
"34 Geosciences (including Geography)",
"33 Mathematics",
"32 Physics",
"31 Chemistry",
"23 Agriculture, Forestry, Horticulture and Veterinary Medicine",
"22 Medicine",
"21 Biology",
"12 Social and Behavioural Sciences",
"11 Humanities"
],
"y": [
49,
2022-02-14 13:34:42 +01:00
156,
42,
19,
11,
2022-02-14 13:34:42 +01:00
792,
35,
318,
229,
202,
660,
924,
464,
329
]
},
{
"name": "re3data tier 3-digits",
"type": "bar",
"x": [
"410 Construction Engineering and Architecture",
"409 Computer Science",
"408 Electrical Engineering",
"407 Systems Engineering",
"406 Materials Science",
"405 Materials Engineering",
"404 Heat Energy Technology, Thermal Machines, Fluid Mechanics",
"403 Process Engineering, Technical Chemistry",
"402 Mechanics and Constructive Mechanical Engineering",
"318 Water Research",
"317 Geography",
"316 Geochemistry, Mineralogy and Crystallography",
"315 Geophysics and Geodesy",
"314 Geology and Palaeontology",
"313 Atmospheric Science and Oceanography",
"312 Mathematics",
"311 Astrophysics and Astronomy",
"310 Statistical Physics, Soft Matter, Biological Physics, Nonlinear Dynamics",
"309 Particles, Nuclei and Fields",
"308 Optics, Quantum Optics and Physics of Atoms, Molecules and Plasmas",
"307 Condensed Matter Physics",
"306 Polymer Research",
"305 Biological Chemistry and Food Chemistry",
"304 Analytical Chemistry, Method Development (Chemistry)",
"303 Physical and Theoretical Chemistry",
"302 Chemical Solid State and Surface Research",
"301 Molecular Chemistry",
"207 Agriculture, Forestry, Horticulture and Veterinary Medicine",
"206 Neurosciences",
"205 Medicine",
"204 Microbiology, Virology and Immunology",
"203 Zoology",
"202 Plant Sciences",
"201 Basic Biological and Medical Research",
"113 Jurisprudence",
"112 Economics",
"111 Social Sciences",
"110 Psychology",
"109 Education Sciences",
"108 Philosophy",
"107 Theology",
"106 Non-European Languages and Cultures, Social and Cultural Anthropology, Jewish Studies and Religious Studies",
"105 Literary Studies",
"104 Linguistics",
"103 Fine Arts, Music, Theatre and Media Studies",
"102 History",
"101 Ancient Cultures"
],
"y": [
41,
2022-02-14 13:34:42 +01:00
94,
16,
24,
2022-02-14 13:34:42 +01:00
26,
9,
9,
10,
6,
2022-02-14 13:34:42 +01:00
150,
164,
93,
269,
83,
415,
7,
179,
5,
51,
75,
2022-02-14 13:34:42 +01:00
26,
4,
22,
22,
33,
22,
44,
2022-02-14 13:34:42 +01:00
134,
93,
412,
224,
254,
219,
536,
55,
219,
301,
42,
67,
10,
22,
2022-02-14 13:34:42 +01:00
47,
30,
110,
76,
2022-02-14 13:34:42 +01:00
113,
71
]
},
{
"name": "re3data tier 5-digits",
"type": "bar",
"x": [
"41006 Geotechnics, Hydraulic Engineering",
"41004 Sructural Engineering, Building Informatics, Construction Operation",
"41002 Urbanism, Spatial Planning, Transportation and Infrastructure Planning, Landscape Planning",
"41001 Architecture, Building and Construction History, Sustainable Building Technology, Building Design",
"40904 Artificial Intelligence, Image and Language Processing",
"40903 Operating, Communication and Information Systems",
"40902 Software Technology",
"40901 Theoretical Computer Science",
"40803 Electrical Energy Generation, Distribution, Application",
"40802 Communication, High-Frequency and Network Technology, Theoretical Electrical Engineering",
"40801 Electronic Semiconductors, Components, Circuits, Systems",
"40705 Human Factors, Ergonomics, Human-Machine Systems",
"40704 Traffic and Transport Systems, Logistics",
"40702 Measurement Systems",
"40701 Automation, Control Systems, Robotics, Mechatronics",
"40605 Biomaterials",
"40603 Microstructural Mechanical Properties of Materials",
"40601 Thermodynamics and Kinetics of Materials",
"40503 Composite Materials",
"40502 Sintered Metallic and Ceramic Materials",
"40501 Metallurgical and Thermal Processes, Thermomechanical Treatment of Materials",
"40402 Technical Thermodynamics",
"40401 Energy Process Engineering",
"40304 Biological Process Engineering",
"40302 Technical Chemistry",
"40301 Chemical and Thermal Process Engineering",
"40204 Acoustics",
"31801 Hydrogeology, Hydrology, Limnology, Urban Water Management, Water Chemistry, Integrated Water Resources Management",
"31702 Human Geography",
"31701 Physical Geography",
"31601 Geochemistry, Mineralogy and Crystallography",
"31502 Geodesy, Photogrammetry, Remote Sensing, Geoinformatics, Cartogaphy",
"31501 Geophysics",
"31401 Geology and Palaeontology",
"31302 Oceanography",
"31301 Atmospheric Science",
"31201 Mathematics",
"31101 Astrophysics and Astronomy",
"31001 Statistical Physics, Soft Matter, Biological Physics, Nonlinear Dynamics",
"30901 Particles, Nuclei and Fields",
"30801 Optics, Quantum Optics, Atoms, Molecules, Plasmas",
"30702 Theoretical Condensed Matter Physics",
"30701 Experimental Condensed Matter Physics",
"30603 Polymer Materials",
"30602 Experimental and Theoretical Physics of Polymers",
"30601 Preparatory and Physical Chemistry of Polymers",
"30502 Food Chemistry",
"30501 Biological and Biomimetic Chemistry",
"30401 Analytical Chemistry, Method Development (Chemistry)",
"30302 General Theoretical Chemistry",
"30301 Physical Chemistry of Molecules, Interfaces and Liquids - Spectroscopy, Kinetics",
"30203 Theory and Modelling",
"30202 Physical Chemistry of Solids and Surfaces, Material Characterisation",
"30201 Solid State and Surface Chemistry, Material Synthesis",
"30102 Organic Molecular Chemistry",
"30101 Inorganic Molecular Chemistry",
"20714 Basic Research on Pathogenesis, Diagnostics and Therapy and Clinical Veterinary Medicine",
"20713 Basic Veterinary Medical Science",
"20711 Animal Husbandry, Breeding and Hygiene",
"20710 Basic Forest Research",
"20709 Inventory Control and Use of Forest Resources",
"20708 Agricultural Economics and Sociology",
"20707 Agricultural and Food Process Engineering",
"20705 Plant Breeding",
"20704 Ecology of Agricultural Landscapes",
"20703 Plant Nutrition",
"20702 Plant Cultivation",
"20701 Soil Sciences",
"20611 Clinical Neurosciences III - Ophthalmology",
"20609 Biological Psychiatry",
"20608 Clinical Neurosciences I - Neurology, Neurosurgery",
"20606 Cognitive Neuroscience and Neuroimaging",
"20605 Comparative Neurobiology",
"20604 Systemic Neuroscience, Computational Neuroscience, Behaviour",
"20603 Developmental Neurobiology",
"20602 Cellular Neuroscience",
"20601 Molecular Neuroscience and Neurogenetics",
"20532 Biomedical Technology and Medical Physics",
"20531 Radiation Oncology and Radiobiology",
"20530 Radiology and Nuclear Medicine",
"20528 Dentistry, Oral Surgery",
"20527 Traumatology and Orthopaedics",
"20526 Cardiothoracic Surgery",
"20524 Gerontology and Geriatric Medicine",
"20523 Urology",
"20522 Reproductive Medicine/Biology",
"20521 Gynaecology and Obstetrics",
"20520 Pediatric and Adolescent Medicine",
"20519 Dermatology",
"20518 Rheumatology, Clinical Immunology, Allergology",
"20517 Endocrinology, Diabetology",
"20515 Gastroenterology, Metabolism",
"20514 Hematology, Oncology, Transfusion Medicine",
"20513 Pneumology, Clinical Infectiology Intensive Care Medicine",
"20512 Cardiology, Angiology",
"20510 Toxicology and Occupational Medicine",
"20509 Pharmacology",
"20508 Pharmacy",
"20507 Clinical Chemistry and Pathobiochemistry",
"20506 Pathology and Forensic Medicine",
"20505 Nutritional Sciences",
"20504 Physiology",
"20503 Human Genetics",
"20502 Public Health, Health Services Research, Social Medicine",
"20501 Epidemiology, Medical Biometry, Medical Informatics",
"20405 Immunology",
"20404 Virology",
"20403 Medical Microbiology, Molecular Infection Biology",
"20402 Microbial Ecology and Applied Microbiology",
"20401 Metabolism, Biochemistry and Genetics of Microorganisms",
"20306 Animal Genetics, Cell and Developmental Biology",
"20305 Biochemistry and Animal Physiology",
"20304 Sensory and Behavioural Biology",
"20303 Animal Ecology, Biodiversity and Ecosystem Research",
"20302 Evolution, Anthropology",
"20301 Systematics and Morphology",
"20207 Plant Genetics",
"20206 Plant Cell and Developmental Biology",
"20205 Plant Biochemistry and Biophysics",
"20204 Plant Physiology",
"20203 Inter-organismic Interactions of Plants",
"20202 Plant Ecology and Ecosystem Analysis",
"20201 Plant Systematics and Evolution",
"20108 Anatomy",
"20107 Bioinformatics and Theoretical Biology",
"20106 Developmental Biology",
"20105 General Genetics",
"20104 Structural Biology",
"20103 Cell Biology",
"20102 Biophysics",
"20101 Biochemistry",
"11305 Criminology",
"11304 Criminal Law and Law of Criminal Procedure",
"11303 Public Law",
"11302 Private Law",
"11301 Legal and Political Philosophy, Legal History, Legal Theory",
"11206 Economic and Social History",
"11205 Statistics and Econometrics",
"11204 Business Administration",
"11203 Public Finance",
"11202 Economic and Social Policy",
"11201 Economic Theory",
"11104 Political Science",
"11103 Communication Science",
"11102 Empirical Social Research",
"11101 Sociological Theory",
"11004 Differential Psychology, Clinical Psychology, Medical Psychology, Methodology",
"11003 Social Psychology, Industrial and Organisational Psychology",
"11002 Developmental and Educational Psychology",
"11001 General, Biological and Mathematical Psychology",
"10903 Research on Socialization and Educational Institutions and Professions",
"10902 Research on Teaching, Learning and Training",
"10901 General Education and History of Education",
"10801 History of Philosophy",
"10702 Roman Catholic Theology",
"10701 Protestant Theology",
"10605 Religious Studies and Jewish Studies",
"10604 Islamic Studies, Arabian Studies, Semitic Studies",
"10603 African, American and Oceania Studies",
"10602 Asian Studies",
"10601 Social and Cultural Anthropology and Ethnology/Folklore",
"10504 General and Comparative Literature and Cultural Studies",
"10503 European and American Literature",
"10501 Medieval German Literature",
"10403 Typology, Non-European Languages, Historical Linguistics",
"10402 Individual Linguistics",
"10401 General and Applied Linguistics",
"10303 Theatre and Media Studies",
"10302 Musicology",
"10301 Art History",
"10204 History of Science",
"10203 Modern and Current History",
"10202 Early Modern History",
"10201 Medieval History",
"10105 Egyptology and Ancient Near Eastern Studies",
"10104 Classical Archaeology",
"10103 Ancient History",
"10102 Classical Philology",
"10101 Prehistory"
],
"y": [
3,
2022-02-14 13:34:42 +01:00
3,
16,
2022-02-14 13:34:42 +01:00
16,
25,
4,
2022-02-14 13:34:42 +01:00
11,
2021-07-22 11:03:05 +02:00
1,
7,
3,
2021-07-22 11:03:05 +02:00
1,
5,
13,
2021-07-22 11:03:05 +02:00
1,
2,
2,
2021-07-22 11:03:05 +02:00
1,
2,
2,
2,
2021-07-22 11:03:05 +02:00
1,
1,
3,
2,
2021-07-22 11:03:05 +02:00
1,
1,
6,
50,
27,
23,
19,
2022-02-14 13:34:42 +01:00
101,
66,
17,
2022-02-14 13:34:42 +01:00
174,
133,
2021-07-22 11:03:05 +02:00
1,
30,
2021-07-22 11:03:05 +02:00
1,
11,
21,
2021-07-22 11:03:05 +02:00
1,
3,
2,
2021-07-22 11:03:05 +02:00
1,
2,
3,
5,
4,
4,
23,
6,
8,
5,
9,
6,
4,
2022-02-14 13:34:42 +01:00
8,
4,
23,
2022-02-14 13:34:42 +01:00
14,
18,
7,
9,
32,
3,
8,
35,
3,
2022-02-14 13:34:42 +01:00
2,
2021-07-22 11:03:05 +02:00
1,
17,
2021-07-22 11:03:05 +02:00
1,
9,
2021-07-22 11:03:05 +02:00
1,
2022-02-14 13:34:42 +01:00
2,
6,
17,
2022-02-14 13:34:42 +01:00
6,
9,
2022-02-14 13:34:42 +01:00
3,
2021-07-22 11:03:05 +02:00
1,
1,
3,
2021-07-22 11:03:05 +02:00
1,
2,
5,
10,
2,
3,
4,
7,
16,
5,
2,
12,
25,
13,
4,
7,
4,
11,
2022-02-14 13:34:42 +01:00
118,
137,
60,
22,
19,
17,
2022-02-14 13:34:42 +01:00
10,
40,
100,
10,
5,
2022-02-14 13:34:42 +01:00
85,
22,
14,
2022-02-14 13:34:42 +01:00
68,
4,
2022-02-14 13:34:42 +01:00
10,
4,
2021-07-22 11:03:05 +02:00
1,
2022-02-14 13:34:42 +01:00
70,
16,
13,
2022-02-14 13:34:42 +01:00
179,
4,
2022-02-14 13:34:42 +01:00
229,
46,
86,
11,
2022-02-14 13:34:42 +01:00
74,
7,
2,
8,
2021-07-22 11:03:05 +02:00
1,
9,
2022-02-14 13:34:42 +01:00
13,
65,
16,
15,
2022-02-14 13:34:42 +01:00
53,
2,
2022-02-14 13:34:42 +01:00
61,
16,
108,
3,
4,
2022-02-14 13:34:42 +01:00
4,
3,
2,
2022-02-14 13:34:42 +01:00
10,
10,
3,
2,
3,
3,
9,
4,
2022-02-14 13:34:42 +01:00
6,
3,
2022-02-14 13:34:42 +01:00
20,
2,
3,
2021-07-22 11:03:05 +02:00
1,
8,
4,
4,
5,
18,
20,
10,
18,
7,
8,
7,
2022-02-14 13:34:42 +01:00
16,
6,
2021-07-22 11:03:05 +02:00
1,
7
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
2022-02-14 13:34:42 +01:00
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
2022-02-14 13:34:42 +01:00
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
2022-02-14 13:34:42 +01:00
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Subject coverage re3data"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
2022-03-17 10:33:11 +01:00
"<div> <div id=\"c8b04df0-2c32-46dc-8744-8d29cae776d5\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"c8b04df0-2c32-46dc-8744-8d29cae776d5\")) { Plotly.newPlot( \"c8b04df0-2c32-46dc-8744-8d29cae776d5\", [{\"name\":\"re3data tier 1-digits\",\"x\":[\"4 Engineering Sciences\",\"3 Natural Sciences\",\"2 Life Sciences\",\"1 Humanities and Social Sciences\"],\"y\":[546,1398,1542,1023],\"type\":\"bar\"},{\"name\":\"re3data tier 2-digits\",\"x\":[\"45 Construction Engineering and Architecture\",\"44 Computer Science, Electrical and System Engineering\",\"43 Materials Science and Engineering\",\"42 Thermal Engineering/Process Engineering\",\"41 Mechanical and industrial Engineering\",\"34 Geosciences (including Geography)\",\"33 Mathematics\",\"32 Physics\",\"31 Chemistry\",\"23 Agriculture, Forestry, Horticulture and Veterinary Medicine\",\"22 Medicine\",\"21 Biology\",\"12 Social and Behavioural Sciences\",\"11 Humanities\"],\"y\":[49,156,42,19,11,792,35,318,229,202,660,924,464,329],\"type\":\"bar\"},{\"name\":\"re3data tier 3-digits\",\"x\":[\"410 Construction Engineering and Architecture\",\"409 Computer Science\",\"408 Electrical Engineering\",\"407 Systems Engineering\",\"406 Materials Science\",\"405 Materials Engineering\",\"404 Heat Energy Technology, Thermal Machines, Fluid Mechanics\",\"403 Process Engineering, Technical Chemistry\",\"402 Mechanics and Constructive Mechanical Engineering\",\"318 Water Research\",\"317 Geography\",\"316 Geochemistry, Mineralogy and Crystallography\",\"315 Geophysics and Geodesy\",\"314 Geology and Palaeontology\",\"313 Atmospheric Science and Oceanography\",\"312 Mathematics\",\"311 Astrophysics and Astronomy\",\"310 Statistical Physics, Soft Matter, Biological Physics, Nonlinear Dynamics\",\"309 Particles, Nuclei and Fields\",\"308 Optics, Quantum Optics and Physics of Atoms, Molecules and Plasmas\",\"307 Condensed Matter Physics\",\"306 Polymer Research\",\"305 Biological Chemistry and Food Chemistry\",\"304 Analytical Chemistry, Method Development (Chemistry)\",\"303 Physical and Theoretical Chemistry\",\"302 Chemical Solid State and Surface Research\",\"301 Molecular Chemistry\",\"207 Agriculture, Forestry, Horticulture and Veterinary Medicine\",\"206 Neurosciences\",\"205 Medicine\",\"204 Microbiology, Virology and Immunology\",\"203 Zoology\",\"202 Plant Sciences\",\"201 Basic Biological and Medical Research\",\"113 Jurisprudence\",\"112 Economics\",\"111 Social Sciences\",\"110 Psychology\",\"109 Education Sciences\",\"108 Philosophy\",\"107 Theology\",\"106 Non-European Languages and Cultures, Social and Cultural Anthropology, Jewish Studies and Religious Studies\",\"105 Literary Studies\",\"104 Linguistics\",\"103 Fine Arts, Music, Theatre and Media Studies\",\"102 History\",\"101 Ancient Cultures\"],\"y\":[41,94,16,24,26,9,9,10,6,150,164,93,269,83,415,7,179,5,51,75,26,4,22,22,33,22,44,134,93,412,224,254,219,536,55,219,301,42,67,10,22,47,30,110,76,113,71],\"type\":\"bar\"},{\"name\":\"re3data tier 5-digits\",\"x\":[\"41006 Geotechnics, Hydraulic Engineering\",\"41004 Sructural Engineering, Building Informatics, Construction Operation\",\"41002 Urbanism, Spatial Planning, Transportation and Infrastructure Planning, Landscape Planning\",\"41001 Architecture, Building and Construction History, Sustainable Building Technology, Building Design\",\"40904 Artificial Intelligence, Image and Language Processing\",\"40903 Operating, Communication and Information Systems\",\"40902 Software Technology\",\"40901 Theoretical Computer Science\",\"40803 Electrical Energy Generation, Distribution, Application\",\"40802 Communication, High-Frequency and Network Technology, Theoretical Electrical Engineering\",\"40801 Electronic Semiconductors, Componen
" \n",
2022-03-17 10:33:11 +01:00
"var gd = document.getElementById('c8b04df0-2c32-46dc-8744-8d29cae776d5');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = re3data_subjects.groupby('subject')[['orgIdentifier']].count().sort_values('subject', ascending=False)\n",
"data\n",
"plot = [\n",
" go.Bar(\n",
" x=data[data.index.str.contains('^\\d{%s}\\s' % tier, regex=True)].index,\n",
" y=data[data.index.str.contains('^\\d{%s}\\s' % tier, regex=True)]['orgIdentifier'],\n",
" name='re3data tier %s-digits' % tier\n",
" ) for tier in [1,2,3,5]\n",
"] \n",
"\n",
"layout = go.Layout(\n",
" title='Subject coverage re3data',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"\n",
"fig = go.Figure(plot, layout).show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**OpenDOAR**"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"opendoar_subjects = opendoar_df.explode('repository_metadata.content_subjects')"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"type": "bar",
"x": [
2022-02-14 13:34:42 +01:00
"social sciences",
"science",
"humanities",
"technology",
"health and medicine",
2022-02-14 13:34:42 +01:00
"arts",
"engineering",
"mathematics"
],
"y": [
2022-02-14 13:34:42 +01:00
4360,
4281,
4069,
3951,
3926,
3849,
3565,
3541
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
2022-02-14 13:34:42 +01:00
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
2022-02-14 13:34:42 +01:00
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
2022-02-14 13:34:42 +01:00
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Subject coverage OpenDOAR"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
2022-03-17 10:33:11 +01:00
"<div> <div id=\"fb0d0556-6124-430d-8548-46eaeb3e8ab8\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"fb0d0556-6124-430d-8548-46eaeb3e8ab8\")) { Plotly.newPlot( \"fb0d0556-6124-430d-8548-46eaeb3e8ab8\", [{\"x\":[\"social sciences\",\"science\",\"humanities\",\"technology\",\"health and medicine\",\"arts\",\"engineering\",\"mathematics\"],\"y\":[4360,4281,4069,3951,3926,3849,3565,3541],\"type\":\"bar\"}], {\"title\":{\"text\":\"Subject coverage OpenDOAR\"},\"xaxis\":{\"tickangle\":45,\"tickfont\":{\"size\":12}},\"template\":{\"data\":{\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"choropleth\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"choropleth\"}],\"contour\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"contour\"}],\"contourcarpet\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"contourcarpet\"}],\"heatmap\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"heatmap\"}],\"heatmapgl\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"heatmapgl\"}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"histogram2d\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"histogram2d\"}],\"histogram2dcontour\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"histogram2dcontour\"}],\"mesh3d\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"mesh3d\"}],\"parcoor
" \n",
2022-03-17 10:33:11 +01:00
"var gd = document.getElementById('fb0d0556-6124-430d-8548-46eaeb3e8ab8');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = opendoar_subjects.groupby('repository_metadata.content_subjects')[['system_metadata.id']].count().sort_values('system_metadata.id', ascending=False)\n",
"plot = [\n",
" go.Bar(\n",
" x=data.index,\n",
" y=data['system_metadata.id'],\n",
" ) \n",
"] \n",
"\n",
"layout = go.Layout(\n",
" title='Subject coverage OpenDOAR',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"\n",
"fig = go.Figure(plot, layout).show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**ROAR**"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"roar_subjects = roar_df.explode('subjects')"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"type": "bar",
"x": [
"L1",
"H1",
"Q1",
"K1",
2022-02-14 13:34:42 +01:00
"T1",
"R1",
2022-02-14 13:34:42 +01:00
"S1",
"HB",
"TA",
"B1",
"AC",
"JA",
2022-02-14 13:34:42 +01:00
"QD",
"Z665",
"AS",
"HG",
"QC",
"BF",
"AI",
2022-02-14 13:34:42 +01:00
"TJ",
"TK",
2022-02-14 13:34:42 +01:00
"QA",
"HM",
"Z719",
"P1",
"G1",
2022-02-14 13:34:42 +01:00
"HA",
"D1",
2022-02-14 13:34:42 +01:00
"TD",
"SB",
"TP",
"HN",
2022-02-14 13:34:42 +01:00
"RT",
"QA75",
2022-02-14 13:34:42 +01:00
"QK",
"BL",
2022-02-14 13:34:42 +01:00
"SF",
"QA76",
"HJ",
"LB",
2022-02-14 13:34:42 +01:00
"QE",
"HC",
2022-02-14 13:34:42 +01:00
"GE",
"SH",
"TH",
"QR",
"AZ",
2022-02-14 13:34:42 +01:00
"SD",
"HF",
2022-02-14 13:34:42 +01:00
"QL",
"N1",
"ZA",
"ZA4050",
"RS",
"LB2300",
"NX",
2022-02-14 13:34:42 +01:00
"HD",
"RK",
2022-02-14 13:34:42 +01:00
"RA",
"TC",
2022-02-14 13:34:42 +01:00
"HE",
"PE",
2022-02-14 13:34:42 +01:00
"RZ",
"M1",
"Z004",
2022-02-14 13:34:42 +01:00
"HF5601",
"GN",
"PN",
2022-02-14 13:34:42 +01:00
"J1",
"RB",
"TG",
"JZ",
"AM",
2022-02-14 13:34:42 +01:00
"BJ",
"HD28",
"JF",
"HT",
"TL",
"QP",
2022-02-14 13:34:42 +01:00
"RM",
"QH301",
"JC",
"RA0421",
2022-02-14 13:34:42 +01:00
"RD",
"RG",
"QH",
"QB",
"TN",
2022-02-14 13:34:42 +01:00
"C1",
"LC",
"HV",
2022-02-14 13:34:42 +01:00
"RC",
"ZA4450",
"TS",
"QM",
"JX",
2022-02-14 13:34:42 +01:00
"BP",
"TX",
"RJ",
"HQ",
"RE",
2022-02-14 13:34:42 +01:00
"CC",
"JS",
"E11",
"QR355",
"TF",
2022-02-14 13:34:42 +01:00
"TE",
"QR180",
"F1201",
2022-02-14 13:34:42 +01:00
"LA",
"RF",
2022-02-14 13:34:42 +01:00
"BC",
"RL",
"LB2361",
"NC",
2022-02-14 13:34:42 +01:00
"HD61",
"BR",
2022-02-14 13:34:42 +01:00
"ML",
"T201",
"ND",
"GF",
"LG",
2022-02-14 13:34:42 +01:00
"DS",
"GC",
"LB1501",
2022-02-14 13:34:42 +01:00
"LF",
"LT",
"GR",
2022-02-14 13:34:42 +01:00
"MT",
"NE",
"QH426",
"BD",
2022-02-14 13:34:42 +01:00
"CT",
"PR",
"GA",
"LB1603",
"LE",
2022-02-14 13:34:42 +01:00
"CB",
"KZ",
"BH",
"HX",
"GB",
2022-02-14 13:34:42 +01:00
"NB",
"BT",
"U1",
"RX",
2022-02-14 13:34:42 +01:00
"BV",
"RC0254",
"NK",
"RC0321",
2022-02-14 13:34:42 +01:00
"RV",
"PL",
2022-02-14 13:34:42 +01:00
"RJ101",
"SK",
"D051",
"DP",
2022-02-14 13:34:42 +01:00
"BS",
"PA",
"TR",
"LC5201",
2022-02-14 13:34:42 +01:00
"RC1200",
"DK",
2022-02-14 13:34:42 +01:00
"JQ",
"TT",
"D901",
2022-02-14 13:34:42 +01:00
"D111",
"GT",
"RA1001",
"JV",
"PC",
"D204",
2022-02-14 13:34:42 +01:00
"DE",
"PB",
2022-02-14 13:34:42 +01:00
"DR",
"F001",
"PD",
2022-02-14 13:34:42 +01:00
"D880",
"V1",
2022-02-14 13:34:42 +01:00
"CD",
"HS",
2022-02-14 13:34:42 +01:00
"PG",
"BM",
"GV",
"JN",
"PS",
"D839",
"PQ",
2022-02-14 13:34:42 +01:00
"BV1460",
"DT",
2022-02-14 13:34:42 +01:00
"D731",
"D501",
"BX",
"BQ",
"PI",
2022-02-14 13:34:42 +01:00
"VM",
"E151",
"CD921",
"DC",
2022-02-14 13:34:42 +01:00
"LD",
"PF",
"DF",
2022-02-14 13:34:42 +01:00
"CN",
"CS",
"JL",
"JK",
"DD",
2022-02-14 13:34:42 +01:00
"D890",
"DA",
"PT",
"PZ",
"DAW",
"F1001",
"PN0441",
2022-02-14 13:34:42 +01:00
"PN0080",
"CJ",
"DG",
"CR",
2022-02-14 13:34:42 +01:00
"KD",
"DL",
"DH",
"PJ",
2022-02-14 13:34:42 +01:00
"CE",
"DU",
"KF",
"DB",
"DJ",
"PN2000",
"PN1990",
2022-02-14 13:34:42 +01:00
"PK",
"DJK",
"PH",
"PN1993",
"JN101",
2022-02-14 13:34:42 +01:00
"JN1187",
"PM",
"PB1501",
"KDC"
],
"y": [
2022-02-14 13:34:42 +01:00
362,
354,
237,
224,
224,
193,
192,
169,
165,
146,
138,
135,
132,
128,
2022-02-14 13:34:42 +01:00
120,
119,
116,
2022-02-14 13:34:42 +01:00
113,
113,
109,
108,
108,
103,
93,
2022-02-14 13:34:42 +01:00
87,
87,
86,
86,
85,
85,
83,
82,
2022-02-14 13:34:42 +01:00
82,
81,
81,
81,
79,
78,
77,
2022-02-14 13:34:42 +01:00
77,
74,
73,
2022-02-14 13:34:42 +01:00
72,
72,
70,
68,
2022-02-14 13:34:42 +01:00
66,
64,
64,
64,
62,
60,
59,
58,
57,
2022-02-14 13:34:42 +01:00
56,
55,
54,
2022-02-14 13:34:42 +01:00
54,
53,
53,
52,
52,
52,
51,
50,
49,
2022-02-14 13:34:42 +01:00
48,
48,
48,
46,
46,
46,
46,
45,
45,
45,
44,
44,
2022-02-14 13:34:42 +01:00
44,
44,
43,
43,
43,
43,
42,
42,
41,
41,
41,
40,
40,
39,
39,
39,
39,
38,
38,
37,
37,
36,
36,
35,
35,
34,
34,
34,
33,
33,
2022-02-14 13:34:42 +01:00
33,
32,
32,
32,
32,
31,
31,
31,
31,
31,
30,
2022-02-14 13:34:42 +01:00
30,
30,
30,
29,
29,
29,
29,
29,
29,
28,
28,
28,
2022-02-14 13:34:42 +01:00
28,
28,
27,
27,
27,
27,
27,
26,
26,
26,
26,
25,
25,
24,
24,
23,
23,
23,
2022-02-14 13:34:42 +01:00
23,
23,
22,
22,
22,
22,
21,
21,
21,
21,
21,
21,
20,
20,
20,
2022-02-14 13:34:42 +01:00
19,
19,
19,
19,
19,
19,
18,
18,
18,
17,
17,
17,
2022-02-14 13:34:42 +01:00
17,
16,
15,
15,
15,
15,
15,
2022-02-14 13:34:42 +01:00
15,
14,
14,
14,
14,
14,
14,
14,
14,
14,
14,
13,
13,
13,
13,
13,
13,
13,
13,
13,
12,
12,
12,
12,
12,
12,
11,
11,
11,
11,
11,
11,
11,
10,
10,
10,
10,
10,
10,
10,
10,
9,
9,
9,
8,
8,
8,
7,
7,
7,
7,
6,
6
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
2022-02-14 13:34:42 +01:00
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
2022-02-14 13:34:42 +01:00
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
2022-02-14 13:34:42 +01:00
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Subject coverage OpenDOAR"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
2022-03-17 10:33:11 +01:00
"<div> <div id=\"a053a269-00b7-49be-bb2a-ace3fb5c6678\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"a053a269-00b7-49be-bb2a-ace3fb5c6678\")) { Plotly.newPlot( \"a053a269-00b7-49be-bb2a-ace3fb5c6678\", [{\"x\":[\"L1\",\"H1\",\"Q1\",\"K1\",\"T1\",\"R1\",\"S1\",\"HB\",\"TA\",\"B1\",\"AC\",\"JA\",\"QD\",\"Z665\",\"AS\",\"HG\",\"QC\",\"BF\",\"AI\",\"TJ\",\"TK\",\"QA\",\"HM\",\"Z719\",\"P1\",\"G1\",\"HA\",\"D1\",\"TD\",\"SB\",\"TP\",\"HN\",\"RT\",\"QA75\",\"QK\",\"BL\",\"SF\",\"QA76\",\"HJ\",\"LB\",\"QE\",\"HC\",\"GE\",\"SH\",\"TH\",\"QR\",\"AZ\",\"SD\",\"HF\",\"QL\",\"N1\",\"ZA\",\"ZA4050\",\"RS\",\"LB2300\",\"NX\",\"HD\",\"RK\",\"RA\",\"TC\",\"HE\",\"PE\",\"RZ\",\"M1\",\"Z004\",\"HF5601\",\"GN\",\"PN\",\"J1\",\"RB\",\"TG\",\"JZ\",\"AM\",\"BJ\",\"HD28\",\"JF\",\"HT\",\"TL\",\"QP\",\"RM\",\"QH301\",\"JC\",\"RA0421\",\"RD\",\"RG\",\"QH\",\"QB\",\"TN\",\"C1\",\"LC\",\"HV\",\"RC\",\"ZA4450\",\"TS\",\"QM\",\"JX\",\"BP\",\"TX\",\"RJ\",\"HQ\",\"RE\",\"CC\",\"JS\",\"E11\",\"QR355\",\"TF\",\"TE\",\"QR180\",\"F1201\",\"LA\",\"RF\",\"BC\",\"RL\",\"LB2361\",\"NC\",\"HD61\",\"BR\",\"ML\",\"T201\",\"ND\",\"GF\",\"LG\",\"DS\",\"GC\",\"LB1501\",\"LF\",\"LT\",\"GR\",\"MT\",\"NE\",\"QH426\",\"BD\",\"CT\",\"PR\",\"GA\",\"LB1603\",\"LE\",\"CB\",\"KZ\",\"BH\",\"HX\",\"GB\",\"NB\",\"BT\",\"U1\",\"RX\",\"BV\",\"RC0254\",\"NK\",\"RC0321\",\"RV\",\"PL\",\"RJ101\",\"SK\",\"D051\",\"DP\",\"BS\",\"PA\",\"TR\",\"LC5201\",\"RC1200\",\"DK\",\"JQ\",\"TT\",\"D901\",\"D111\",\"GT\",\"RA1001\",\"JV\",\"PC\",\"D204\",\"DE\",\"PB\",\"DR\",\"F001\",\"PD\",\"D880\",\"V1\",\"CD\",\"HS\",\"PG\",\"BM\",\"GV\",\"JN\",\"PS\",\"D839\",\"PQ\",\"BV1460\",\"DT\",\"D731\",\"D501\",\"BX\",\"BQ\",\"PI\",\"VM\",\"E151\",\"CD921\",\"DC\",\"LD\",\"PF\",\"DF\",\"CN\",\"CS\",\"JL\",\"JK\",\"DD\",\"D890\",\"DA\",\"PT\",\"PZ\",\"DAW\",\"F1001\",\"PN0441\",\"PN0080\",\"CJ\",\"DG\",\"CR\",\"KD\",\"DL\",\"DH\",\"PJ\",\"CE\",\"DU\",\"KF\",\"DB\",\"DJ\",\"PN2000\",\"PN1990\",\"PK\",\"DJK\",\"PH\",\"PN1993\",\"JN101\",\"JN1187\",\"PM\",\"PB1501\",\"KDC\"],\"y\":[362,354,237,224,224,193,192,169,165,146,138,135,132,128,120,119,116,113,113,109,108,108,103,93,87,87,86,86,85,85,83,82,82,81,81,81,79,78,77,77,74,73,72,72,70,68,66,64,64,64,62,60,59,58,57,56,55,54,54,53,53,52,52,52,51,50,49,48,48,48,46,46,46,46,45,45,45,44,44,44,44,43,43,43,43,42,42,41,41,41,40,40,39,39,39,39,38,38,37,37,36,36,35,35,34,34,34,33,33,33,32,32,32,32,31,31,31,31,31,30,30,30,30,29,29,29,29,29,29,28,28,28,28,28,27,27,27,27,27,26,26,26,26,25,25,24,24,23,23,23,23,23,22,22,22,22,21,21,21,21,21,21,20,20,20,19,19,19,19,19,19,18,18,18,17,17,17,17,16,15,15,15,15,15,15,14,14,14,14,14,14,14,14,14,14,13,13,13,13,13,13,13,13,13,12,12,12,12,12,12,11,11,11,11,11,11,11,10,10,10,10,10,10,10,10,9,9,9,8,8,8,7,7,7,7,6,6],\"type\":\"bar\"}], {\"title\":{\"text\":\"Subject coverage OpenDOAR\"},\"xaxis\":{\"tickangle\":45,\"tickfont\":{\"size\":12}},\"template\":{\"data\":{\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"choropleth\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"choropleth
" \n",
2022-03-17 10:33:11 +01:00
"var gd = document.getElementById('a053a269-00b7-49be-bb2a-ace3fb5c6678');\n",
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = roar_subjects.groupby('subjects')[['eprintid']].count().sort_values('eprintid', ascending=False)\n",
"plot = [\n",
" go.Bar(\n",
" x=data.index,\n",
" y=data['eprintid'],\n",
" ) \n",
"] \n",
"\n",
"layout = go.Layout(\n",
" title='Subject coverage OpenDOAR',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"\n",
"fig = go.Figure(plot, layout).show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**FAIRsharing**"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"fairsharing_subjects = fairsharing_df.explode('attributes.subjects')"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"name": "FAIRsharing",
"type": "bar",
"x": [
"Life Science",
"Biomedical Science",
"Earth Science",
"Genomics",
"Environmental Science",
"Oceanography",
2022-03-17 10:33:11 +01:00
"Biodiversity",
"Epidemiology",
"Biology",
"Atmospheric Science",
"Genetics",
"Health Science",
2022-03-17 10:33:11 +01:00
"Subject Agnostic",
"Bioinformatics",
2022-03-17 10:33:11 +01:00
"Proteomics",
"Virology",
"Agriculture",
"Geology",
"Transcriptomics",
2022-03-17 10:33:11 +01:00
"Comparative Genomics",
"Clinical Studies",
"Chemistry",
2022-03-17 10:33:11 +01:00
"Preclinical Studies",
"Data Management",
2022-03-17 10:33:11 +01:00
"Botany",
"Medicine",
"Functional Genomics",
"Geophysics",
2022-03-17 10:33:11 +01:00
"Meteorology",
"Systems Biology",
"Social Science",
"Ecology",
"Metabolomics",
2022-03-17 10:33:11 +01:00
"Geography",
"Marine Biology",
"Physics",
2022-03-17 10:33:11 +01:00
"Economics",
"Taxonomy",
"Biochemistry",
"Microbiology",
"Ontology and Terminology",
"Astrophysics and Astronomy",
2022-03-17 10:33:11 +01:00
"Humanities and Social Science",
"Hydrology",
"Structural Biology",
"Molecular biology",
"Neurobiology",
2022-03-17 10:33:11 +01:00
"Computational Biology",
"Natural Science",
"Infectious Disease Medicine",
"Phylogenetics",
2022-03-17 10:33:11 +01:00
"Medical Virology",
"Developmental Biology",
2022-03-17 10:33:11 +01:00
"Social and Behavioural Science",
"Computer Science",
"Anatomy",
2022-03-17 10:33:11 +01:00
"Humanities",
"Epigenetics",
2022-03-17 10:33:11 +01:00
"Metagenomics",
"Geochemistry",
"Immunology",
"Remote Sensing",
"Knowledge and Information Systems",
"Neuroscience",
2022-03-17 10:33:11 +01:00
"Plant Genetics",
"Public Health",
"Demographics",
2022-03-17 10:33:11 +01:00
"Critical Care Medicine",
"Ecosystem Science",
"Cell Biology",
"Soil Science",
"Data Visualization",
"Data Integration",
"Oncology",
2022-03-17 10:33:11 +01:00
"Energy Engineering",
"Materials Science",
2022-03-17 10:33:11 +01:00
"Forest Management",
"Hydrogeology",
2022-03-17 10:33:11 +01:00
"Drug Discovery",
"Water Management",
"Glycomics",
"Plant Breeding",
"Water Research",
2022-03-17 10:33:11 +01:00
"Phylogeny",
"Geodesy",
2022-03-17 10:33:11 +01:00
"Paleontology",
"Phylogenomics",
"Engineering Science",
"Population Genetics",
"Human Genetics",
2022-03-17 10:33:11 +01:00
"Immunogenetics",
"Software Engineering",
"Mineralogy",
"Pharmacology",
"Computational Neuroscience",
2022-03-17 10:33:11 +01:00
"Medical Informatics",
"Freshwater Science",
"Global Health",
2022-03-17 10:33:11 +01:00
"Cheminformatics",
"Evolutionary Biology",
"Epigenomics",
"Statistics",
2022-03-17 10:33:11 +01:00
"Animal Genetics",
"Translational Medicine",
"Phenomics",
2022-03-17 10:33:11 +01:00
"Political Science",
"Omics",
"Zoology",
2022-03-17 10:33:11 +01:00
"Culture",
"Psychology",
"Mathematics",
"Drug Development",
2022-03-17 10:33:11 +01:00
"Nanotechnology",
"Education Science",
2022-03-17 10:33:11 +01:00
"Physical Geography",
"Nutritional Science",
"Population Dynamics",
"Natural History",
"Microbial Ecology",
"Enzymology",
"Neurophysiology",
2022-03-17 10:33:11 +01:00
"Geoinformatics",
"Food Security",
"Pathology",
"Analytical Chemistry",
"Archaeology",
"Data Governance",
"Organic Chemistry",
2022-03-17 10:33:11 +01:00
"Bioengineering",
"History",
2022-03-17 10:33:11 +01:00
"Biotechnology",
"Database Management",
"Computational Chemistry",
"Animal Husbandry",
"Toxicology",
"Anthropology",
"Architecture",
"Plant Anatomy",
"Cartography",
"Art",
2022-03-17 10:33:11 +01:00
"Data Quality",
"Informatics",
"Thermodynamics",
"Materials Engineering",
2022-03-17 10:33:11 +01:00
"Maritime Engineering",
"Linguistics",
"Urban Planning",
"Civil Engineering",
"Agronomy",
"Fine Arts",
2022-03-17 10:33:11 +01:00
"Synthetic Biology",
"Toxicogenomics",
"Cardiology",
"Bathymetry",
"Aerospace Engineering",
2022-03-17 10:33:11 +01:00
"Art History",
"Chemical Engineering",
"Business Administration",
2022-03-17 10:33:11 +01:00
"Primary Health Care",
"Pharmacogenomics",
"Power Engineering",
"Economic and Social History",
"Molecular Chemistry",
"Endocrinology",
"Fisheries Science",
2022-03-17 10:33:11 +01:00
"Neurology",
"Communication Science",
"Criminology",
"Tropical Medicine",
"Geriatric Medicine",
"Respiratory Medicine",
2022-03-17 10:33:11 +01:00
"Transportation Planning",
"Health Services Research",
"Physiology",
"Medicinal Chemistry",
2022-03-17 10:33:11 +01:00
"Molecular Genetics",
"Agricultural Engineering",
"Molecular Microbiology",
"Aquaculture",
"Agroecology",
2022-03-17 10:33:11 +01:00
"Pediatrics",
"Occupational Medicine",
"Applied Microbiology",
"Microbial Genetics",
"Hydrography",
"Materials Informatics",
"Geotechnics",
2022-03-17 10:33:11 +01:00
"Embryology",
"Proteogenomics",
"Entomology",
"Drug Repositioning",
"Drug Metabolism",
2022-03-17 10:33:11 +01:00
"Farming Systems Research",
"Developmental Neurobiology",
"Data Security",
"Data Mining",
"Plant Ecology",
"Pharmacy",
"Construction Engineering",
2022-03-17 10:33:11 +01:00
"Reproductive Health",
"Prehistory",
"Human Biology",
2022-03-17 10:33:11 +01:00
"Systemic Neuroscience",
"Inorganic Molecular Chemistry",
"Physical Chemistry",
"Electromagnetism",
"Community Care",
"Veterinary Medicine",
"Public Law",
"Social Psychology",
"Rural and Agricultural Sociology",
"Neurogenetics",
"Public Finance",
"Structural Genomics",
2022-03-17 10:33:11 +01:00
"Quantitative Genetics",
"Synthetic Chemistry",
"Social Policy",
"Philosophy",
"Molecular Physical Chemistry",
"Research on Teaching, Learning and Training",
"Industrial Engineering",
2022-03-17 10:33:11 +01:00
"Molecular Neuroscience",
"Biomaterials",
2022-03-17 10:33:11 +01:00
"Jurisprudence",
"Building Design",
2022-03-17 10:33:11 +01:00
"Human Geography",
"Building Engineering Physics",
"Historical Linguistics",
"Cellular Neuroscience",
"Chemical Biology",
"Classical Philology",
2022-03-17 10:33:11 +01:00
"Gastroenterology",
"Functional Materials Research",
"Cognitive Neuroscience",
"Comparative Neurobiology",
"Electrical Engineering",
"Composite Materials",
2022-03-17 10:33:11 +01:00
"Cosmology",
"Digital Image Processing",
2022-03-17 10:33:11 +01:00
"Criminal Law",
"Limnology",
"Cultural Studies",
"Applied Mathematics",
"Media Studies",
"Agricultural Economics",
"Molecular Infection Biology",
2022-03-17 10:33:11 +01:00
"Animal Physiology",
"Molecular Dynamics",
"Medicines Research and Development",
"Ancient Cultures",
2022-03-17 10:33:11 +01:00
"Medical Physics",
"Ancient History",
"Clinical Psychology",
"Clinical Veterinary Medicine",
"Animal Breeding",
"Theoretical Chemistry",
"Social Medicine",
2022-03-17 10:33:11 +01:00
"Selenography",
"Applied Linguistics",
"Component Engineering",
"Rheumatology",
"Computer Architecture",
"Agricultural Law",
2022-03-17 10:33:11 +01:00
"Religious Studies",
"Safety Science",
"Solid-State Chemistry",
"Clinical Chemistry",
"Biological Process Engineering",
"Telecommunication Engineering",
"Technical Chemistry",
2022-03-17 10:33:11 +01:00
"Biological Psychology",
"Thermal Technology",
"Biomimetic Chemistry",
"Biophysics",
"Biotherapeutics",
"Synthesis Chemistry",
2022-03-17 10:33:11 +01:00
"Theology",
"Regenerative Medicine",
"Behavioural Biology",
"Surface Science",
"Atomic, Molecular, Optical and Plasma Physics",
"Structural Engineering",
2022-03-17 10:33:11 +01:00
"Traditional Medicine",
"Artificial Intelligence",
"Surgery",
"Economic Theory",
"Radiology",
"Ophthalmology",
"Logistics Engineering",
"Materials Structuring and Functionalisation",
2022-03-17 10:33:11 +01:00
"Personalized Medicine",
"Mechanical Behaviour of Construction Materials",
2022-03-17 10:33:11 +01:00
"Mechanical Engineering",
"Particles, Nuclei and Fields",
"Parasitology",
"Mechanical Process Engineering",
"Organic Molecular Chemistry",
"Mechanics",
2022-03-17 10:33:11 +01:00
"Medical Microbiology",
"Dermatology",
"Metal-Cutting Manufacturing Engineering",
"Obstetrics",
"Metaproteomics",
"Nuclear Medicine",
2022-03-17 10:33:11 +01:00
"Metatranscriptomics",
"Microbial Physiology",
"Microstructural Mechanical Properties of Materials",
"Modern History",
"Molecular Medicine",
"Molecular Neurology",
"Literary Studies",
"Landscape Planning",
"Photogrammetry",
"Hydraulic Engineering",
"Developmental Psychology",
"Econometrics",
"Economic Policy",
"Musculoskeletal Medicine",
"Process Engineering",
"Egyptology",
"Electrophysiology",
"Empirical Social Research",
"Food Process Engineering",
"Forensic Medicine",
"Polymer Chemistry",
"Policy",
"Plastics Engineering",
2022-03-17 10:33:11 +01:00
"Gemology",
"Plant Cultivation",
"Plant Cell Biology",
2022-03-17 10:33:11 +01:00
"Gynecology",
"Hematology",
"History of Science",
2022-03-17 10:33:11 +01:00
"Horticulture",
"Human-Machine Systems Engineering",
"Acoustics"
],
"y": [
2022-03-17 10:33:11 +01:00
863,
256,
236,
192,
139,
98,
95,
95,
91,
79,
78,
78,
74,
2022-03-17 10:33:11 +01:00
73,
73,
71,
61,
2022-03-17 10:33:11 +01:00
53,
53,
50,
50,
49,
49,
46,
45,
45,
2022-03-17 10:33:11 +01:00
44,
44,
42,
42,
40,
2022-03-17 10:33:11 +01:00
37,
34,
33,
29,
28,
2022-03-17 10:33:11 +01:00
28,
28,
27,
27,
26,
2022-03-17 10:33:11 +01:00
26,
26,
25,
2022-03-17 10:33:11 +01:00
24,
24,
23,
22,
2022-03-17 10:33:11 +01:00
22,
22,
22,
21,
20,
20,
2022-03-17 10:33:11 +01:00
20,
20,
19,
19,
19,
19,
19,
19,
18,
18,
18,
17,
17,
17,
17,
2022-03-17 10:33:11 +01:00
17,
16,
16,
16,
15,
15,
15,
15,
14,
14,
14,
14,
14,
14,
13,
13,
13,
13,
2022-03-17 10:33:11 +01:00
13,
13,
13,
12,
11,
11,
11,
11,
10,
10,
9,
9,
9,
9,
9,
9,
2022-03-17 10:33:11 +01:00
9,
8,
8,
8,
8,
8,
8,
7,
7,
7,
7,
7,
7,
7,
7,
7,
7,
7,
6,
6,
6,
6,
6,
6,
6,
6,
6,
6,
6,
2022-03-17 10:33:11 +01:00
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
5,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
4,
2022-03-17 10:33:11 +01:00
4,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
3,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2022-03-17 10:33:11 +01:00
2,
2021-07-22 11:03:05 +02:00
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
2022-02-14 13:34:42 +01:00
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
2021-07-22 11:03:05 +02:00
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
2022-02-14 13:34:42 +01:00
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
2021-07-22 11:03:05 +02:00
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
2022-02-14 13:34:42 +01:00
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
2021-07-22 11:03:05 +02:00
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Subject coverage FAIRsharing"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
2022-03-17 10:33:11 +01:00
"<div> <div id=\"457d448f-a9bc-4c08-862b-b9cec750a8c1\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"457d448f-a9bc-4c08-862b-b9cec750a8c1\")) { Plotly.newPlot( \"457d448f-a9bc-4c08-862b-b9cec750a8c1\", [{\"name\":\"FAIRsharing\",\"x\":[\"Life Science\",\"Biomedical Science\",\"Earth Science\",\"Genomics\",\"Environmental Science\",\"Oceanography\",\"Biodiversity\",\"Epidemiology\",\"Biology\",\"Atmospheric Science\",\"Genetics\",\"Health Science\",\"Subject Agnostic\",\"Bioinformatics\",\"Proteomics\",\"Virology\",\"Agriculture\",\"Geology\",\"Transcriptomics\",\"Comparative Genomics\",\"Clinical Studies\",\"Chemistry\",\"Preclinical Studies\",\"Data Management\",\"Botany\",\"Medicine\",\"Functional Genomics\",\"Geophysics\",\"Meteorology\",\"Systems Biology\",\"Social Science\",\"Ecology\",\"Metabolomics\",\"Geography\",\"Marine Biology\",\"Physics\",\"Economics\",\"Taxonomy\",\"Biochemistry\",\"Microbiology\",\"Ontology and Terminology\",\"Astrophysics and Astronomy\",\"Humanities and Social Science\",\"Hydrology\",\"Structural Biology\",\"Molecular biology\",\"Neurobiology\",\"Computational Biology\",\"Natural Science\",\"Infectious Disease Medicine\",\"Phylogenetics\",\"Medical Virology\",\"Developmental Biology\",\"Social and Behavioural Science\",\"Computer Science\",\"Anatomy\",\"Humanities\",\"Epigenetics\",\"Metagenomics\",\"Geochemistry\",\"Immunology\",\"Remote Sensing\",\"Knowledge and Information Systems\",\"Neuroscience\",\"Plant Genetics\",\"Public Health\",\"Demographics\",\"Critical Care Medicine\",\"Ecosystem Science\",\"Cell Biology\",\"Soil Science\",\"Data Visualization\",\"Data Integration\",\"Oncology\",\"Energy Engineering\",\"Materials Science\",\"Forest Management\",\"Hydrogeology\",\"Drug Discovery\",\"Water Management\",\"Glycomics\",\"Plant Breeding\",\"Water Research\",\"Phylogeny\",\"Geodesy\",\"Paleontology\",\"Phylogenomics\",\"Engineering Science\",\"Population Genetics\",\"Human Genetics\",\"Immunogenetics\",\"Software Engineering\",\"Mineralogy\",\"Pharmacology\",\"Computational Neuroscience\",\"Medical Informatics\",\"Freshwater Science\",\"Global Health\",\"Cheminformatics\",\"Evolutionary Biology\",\"Epigenomics\",\"Statistics\",\"Animal Genetics\",\"Translational Medicine\",\"Phenomics\",\"Political Science\",\"Omics\",\"Zoology\",\"Culture\",\"Psychology\",\"Mathematics\",\"Drug Development\",\"Nanotechnology\",\"Education Science\",\"Physical Geography\",\"Nutritional Science\",\"Population Dynamics\",\"Natural History\",\"Microbial Ecology\",\"Enzymology\",\"Neurophysiology\",\"Geoinformatics\",\"Food Security\",\"Pathology\",\"Analytical Chemistry\",\"Archaeology\",\"Data Governance\",\"Organic Chemistry\",\"Bioengineering\",\"History\",\"Biotechnology\",\"Database Management\",\"Computational Chemistry\",\"Animal Husbandry\",\"Toxicology\",\"Anthropology\",\"Architecture\",\"Plant Anatomy\",\"Cartography\",\"Art\",\"Data Quality\",\"Informatics\",\"Thermodynamics\",\"Materials Engineering\",\"Maritime Engineering\",\"Linguistics\",\"Urban Planning\",\"Civil Engineering\",\"Agronomy\",\"Fine Arts\",\"Synthetic Biology\",\"Toxicogenomics\",\"Cardiology\",\"Bathymetry\",\"Aerospace Engineering\",\"Art History\",\"Chemical Engineering\",\"Business Administration\",\"Primary Health Care\",\"Pharmacogenomics\",\"Power Engineering\",\"Economic and Social History\",\"Molecular Chemistry\",\"Endocrinology\",\"Fisheries Science\",\"Neurology\",\"Communication Science\",\"Criminology\",\"Tropical Medicine\",\"Geriatric Medicine\",\"Respiratory Medicine\",\"Transportation Planning\",\"Health Services Research\",\"Physiology\",\"Medicinal Chemistry\",\"Molecular Genetics\",\"Agricultural Engineering\",\"Molecular Microbiology\",\"Aquaculture\",\
2021-07-22 11:03:05 +02:00
" \n",
2022-03-17 10:33:11 +01:00
"var gd = document.getElementById('457d448f-a9bc-4c08-862b-b9cec750a8c1');\n",
2021-07-22 11:03:05 +02:00
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = fairsharing_subjects.groupby('attributes.subjects')[['id']].count().sort_values('id', ascending=False)\n",
2021-07-22 11:03:05 +02:00
"plot = [\n",
" go.Bar(\n",
" x=data.index,\n",
" y=data['id'],\n",
2021-07-22 11:03:05 +02:00
" name='FAIRsharing'\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Subject coverage FAIRsharing',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"\n",
"fig = go.Figure(plot, layout).show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Geographic analysis"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**re3data**"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 19,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2021-10-08 14:28:56 +02:00
" <th>orgIdentifier</th>\n",
2021-07-22 11:03:05 +02:00
" <th>institution</th>\n",
2021-10-08 14:28:56 +02:00
" <th>institutionName</th>\n",
" <th>institutionAdditionalName</th>\n",
" <th>institutionCountry</th>\n",
" <th>responsabilityType</th>\n",
" <th>institutionType</th>\n",
" <th>institutionURL</th>\n",
" <th>institutionIdentifier</th>\n",
" <th>responsibilityStartDate</th>\n",
" <th>responsibilityEndDate</th>\n",
" <th>institutionContact</th>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>r3d100000001</td>\n",
2021-10-08 14:28:56 +02:00
" <td>{'institutionName': 'Odum Institute for Resear...</td>\n",
2021-07-22 11:03:05 +02:00
" <td>Odum Institute for Research in Social Science</td>\n",
" <td>[]</td>\n",
" <td>USA</td>\n",
" <td>[general]</td>\n",
" <td>non-profit</td>\n",
" <td>https://odum.unc.edu/archive/</td>\n",
" <td>[]</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>[]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>r3d100000002</td>\n",
2021-10-08 14:28:56 +02:00
" <td>{'institutionName': 'The U.S. National Archive...</td>\n",
2021-07-22 11:03:05 +02:00
" <td>The U.S. National Archives and Records Adminis...</td>\n",
" <td>[NARA, National Archives]</td>\n",
" <td>USA</td>\n",
" <td>[general]</td>\n",
" <td>non-profit</td>\n",
" <td>http://www.archives.gov/</td>\n",
" <td>[]</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>[http://www.archives.gov/contact/]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>r3d100000002</td>\n",
2021-10-08 14:28:56 +02:00
" <td>{'institutionName': 'The USA.gov', 'institutio...</td>\n",
2021-07-22 11:03:05 +02:00
" <td>The USA.gov</td>\n",
" <td>[]</td>\n",
" <td>USA</td>\n",
" <td>[general]</td>\n",
" <td>non-profit</td>\n",
" <td>http://www.usa.gov/</td>\n",
" <td>[]</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>[http://www.usa.gov/Contact.shtml]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>r3d100000004</td>\n",
2021-10-08 14:28:56 +02:00
" <td>{'institutionName': 'Institut für Deutsche Spr...</td>\n",
2021-07-22 11:03:05 +02:00
" <td>Institut für Deutsche Sprache, Archiv für Gesp...</td>\n",
" <td>[AGD]</td>\n",
" <td>DEU</td>\n",
" <td>[funding, general]</td>\n",
" <td>non-profit</td>\n",
" <td>http://agd.ids-mannheim.de/index.shtml</td>\n",
" <td>[]</td>\n",
" <td>2004</td>\n",
" <td></td>\n",
" <td>[agd@ids-mannheim.de]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>r3d100000005</td>\n",
2021-10-08 14:28:56 +02:00
" <td>{'institutionName': 'Odum Institute for Resear...</td>\n",
2021-07-22 11:03:05 +02:00
" <td>Odum Institute for Research in Social Science</td>\n",
" <td>[]</td>\n",
" <td>USA</td>\n",
" <td>[technical]</td>\n",
" <td>non-profit</td>\n",
" <td>https://odum.unc.edu/</td>\n",
" <td>[]</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>[https://odum.unc.edu/contact/contact-form/, o...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2021-10-08 14:28:56 +02:00
" orgIdentifier institution \\\n",
"0 r3d100000001 {'institutionName': 'Odum Institute for Resear... \n",
"1 r3d100000002 {'institutionName': 'The U.S. National Archive... \n",
"2 r3d100000002 {'institutionName': 'The USA.gov', 'institutio... \n",
"3 r3d100000004 {'institutionName': 'Institut für Deutsche Spr... \n",
"4 r3d100000005 {'institutionName': 'Odum Institute for Resear... \n",
"\n",
" institutionName \\\n",
2021-07-22 11:03:05 +02:00
"0 Odum Institute for Research in Social Science \n",
"1 The U.S. National Archives and Records Adminis... \n",
"2 The USA.gov \n",
"3 Institut für Deutsche Sprache, Archiv für Gesp... \n",
"4 Odum Institute for Research in Social Science \n",
"\n",
2021-10-08 14:28:56 +02:00
" institutionAdditionalName institutionCountry responsabilityType \\\n",
"0 [] USA [general] \n",
"1 [NARA, National Archives] USA [general] \n",
"2 [] USA [general] \n",
"3 [AGD] DEU [funding, general] \n",
"4 [] USA [technical] \n",
"\n",
" institutionType institutionURL \\\n",
"0 non-profit https://odum.unc.edu/archive/ \n",
"1 non-profit http://www.archives.gov/ \n",
"2 non-profit http://www.usa.gov/ \n",
"3 non-profit http://agd.ids-mannheim.de/index.shtml \n",
"4 non-profit https://odum.unc.edu/ \n",
"\n",
" institutionIdentifier responsibilityStartDate responsibilityEndDate \\\n",
"0 [] \n",
"1 [] \n",
"2 [] \n",
"3 [] 2004 \n",
"4 [] \n",
"\n",
" institutionContact \n",
2021-07-22 11:03:05 +02:00
"0 [] \n",
"1 [http://www.archives.gov/contact/] \n",
"2 [http://www.usa.gov/Contact.shtml] \n",
"3 [agd@ids-mannheim.de] \n",
"4 [https://odum.unc.edu/contact/contact-form/, o... "
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 19,
2021-07-22 11:03:05 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-10-08 14:28:56 +02:00
"re3data_institutions = re3data_df.explode('institution')[['orgIdentifier', 'institution']]\n",
2021-07-22 11:03:05 +02:00
"re3data_institutions = re3data_institutions[~re3data_institutions.institution.isna()].reset_index(drop=True)\n",
2021-10-08 14:28:56 +02:00
"re3data_institutions = re3data_institutions.join(pd.json_normalize(re3data_institutions.institution))\n",
2021-07-22 11:03:05 +02:00
"re3data_institutions.head()"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 20,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [],
"source": [
2021-10-08 14:28:56 +02:00
"re3data_institutions['org_continent'] = re3data_institutions.institutionCountry.map(countrycode_to_continent)"
2021-07-22 11:03:05 +02:00
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 21,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['AAA', 'EEC'], dtype=object)"
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 21,
2021-07-22 11:03:05 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-10-08 14:28:56 +02:00
"re3data_institutions[re3data_institutions.org_continent.isna()].institutionCountry.unique()"
2021-07-22 11:03:05 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"AAA is used for international collaborations; we skip this.\n",
"EEC is used for the EU commission; we fix the continent manually."
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 22,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [],
"source": [
2021-10-08 14:28:56 +02:00
"re3data_institutions.loc[re3data_institutions.institutionCountry == 'EEC', 'org_continent'] = 'EU'"
2021-07-22 11:03:05 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**OpenDOAR**"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 23,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2021-10-08 14:28:56 +02:00
" <th>system_metadata.id</th>\n",
" <th>organization</th>\n",
" <th>name</th>\n",
" <th>alternativeName</th>\n",
" <th>country</th>\n",
" <th>url</th>\n",
" <th>identifier</th>\n",
" <th>location.latitude</th>\n",
" <th>location.longiture</th>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2022-02-14 13:34:42 +01:00
" <td>134</td>\n",
" <td>{'name': 'technische universität dortmund', 'a...</td>\n",
" <td>technische universität dortmund</td>\n",
" <td>tu dortmund</td>\n",
" <td>DEU</td>\n",
" <td>https://www.tu-dortmund.de</td>\n",
" <td>[{'identifier': 'https://ror.org/01k97gp34', '...</td>\n",
" <td></td>\n",
" <td></td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2022-02-14 13:34:42 +01:00
" <td>58</td>\n",
" <td>{'name': 'centre pour la communication scienti...</td>\n",
" <td>centre pour la communication scientifique directe</td>\n",
" <td>ccsd</td>\n",
" <td>FRA</td>\n",
" <td>https://www.ccsd.cnrs.fr</td>\n",
" <td>[]</td>\n",
" <td></td>\n",
" <td></td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2022-02-14 13:34:42 +01:00
" <td>93</td>\n",
" <td>{'name': 'texas medical center', 'alternativeN...</td>\n",
" <td>texas medical center</td>\n",
" <td>tmc</td>\n",
" <td>USA</td>\n",
" <td>https://www.tmc.edu</td>\n",
" <td>[{'identifier': 'https://ror.org/00dqsbj20', '...</td>\n",
" <td></td>\n",
" <td></td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2022-02-14 13:34:42 +01:00
" <td>68</td>\n",
" <td>{'name': 'university of southampton', 'alterna...</td>\n",
" <td>university of southampton</td>\n",
" <td></td>\n",
" <td>GBR</td>\n",
" <td>https://www.southampton.ac.uk/</td>\n",
" <td>[{'identifier': 'https://ror.org/01ryk1543', '...</td>\n",
" <td></td>\n",
" <td></td>\n",
2021-07-22 11:03:05 +02:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2022-02-14 13:34:42 +01:00
" <td>84</td>\n",
" <td>{'name': 'carleton college', 'alternativeName'...</td>\n",
" <td>carleton college</td>\n",
" <td></td>\n",
" <td>USA</td>\n",
" <td>https://www.carleton.edu</td>\n",
" <td>[{'identifier': 'https://ror.org/03jep7677', '...</td>\n",
" <td></td>\n",
2021-07-22 11:03:05 +02:00
" <td></td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2021-10-08 14:28:56 +02:00
" system_metadata.id organization \\\n",
2022-02-14 13:34:42 +01:00
"0 134 {'name': 'technische universität dortmund', 'a... \n",
"1 58 {'name': 'centre pour la communication scienti... \n",
"2 93 {'name': 'texas medical center', 'alternativeN... \n",
"3 68 {'name': 'university of southampton', 'alterna... \n",
"4 84 {'name': 'carleton college', 'alternativeName'... \n",
"\n",
" name alternativeName country \\\n",
"0 technische universität dortmund tu dortmund DEU \n",
"1 centre pour la communication scientifique directe ccsd FRA \n",
"2 texas medical center tmc USA \n",
"3 university of southampton GBR \n",
"4 carleton college USA \n",
"\n",
" url \\\n",
"0 https://www.tu-dortmund.de \n",
"1 https://www.ccsd.cnrs.fr \n",
"2 https://www.tmc.edu \n",
"3 https://www.southampton.ac.uk/ \n",
"4 https://www.carleton.edu \n",
"\n",
" identifier location.latitude \\\n",
"0 [{'identifier': 'https://ror.org/01k97gp34', '... \n",
"1 [] \n",
"2 [{'identifier': 'https://ror.org/00dqsbj20', '... \n",
"3 [{'identifier': 'https://ror.org/01ryk1543', '... \n",
"4 [{'identifier': 'https://ror.org/03jep7677', '... \n",
"\n",
" location.longiture \n",
"0 \n",
"1 \n",
"2 \n",
"3 \n",
"4 "
2021-07-22 11:03:05 +02:00
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 23,
2021-07-22 11:03:05 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-10-08 14:28:56 +02:00
"opendoar_institutions = opendoar_df.explode('organization')[['system_metadata.id', 'organization']]\n",
"opendoar_institutions = opendoar_institutions[~opendoar_institutions.organization.isna()].reset_index(drop=True)\n",
"opendoar_institutions = opendoar_institutions.join(pd.json_normalize(opendoar_institutions.organization))\n",
"opendoar_institutions['country'] = opendoar_institutions.country.map(str.upper, na_action='ignore')\n",
"opendoar_institutions['country'] = opendoar_institutions.country.map(countrycode_iso2_to_countrycode_iso3, na_action='ignore')\n",
2021-07-22 11:03:05 +02:00
"opendoar_institutions.head()"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 24,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [],
"source": [
2021-10-08 14:28:56 +02:00
"opendoar_institutions['org_continent'] = opendoar_institutions.country.map(countrycode_to_continent)"
2021-07-22 11:03:05 +02:00
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 25,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([nan, 'UMI'], dtype=object)"
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 25,
2021-07-22 11:03:05 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-10-08 14:28:56 +02:00
"opendoar_institutions[opendoar_institutions.org_continent.isna()].country.unique()"
2021-07-22 11:03:05 +02:00
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 26,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2021-10-08 14:28:56 +02:00
" <th>system_metadata.id</th>\n",
" <th>organization</th>\n",
" <th>name</th>\n",
" <th>alternativeName</th>\n",
" <th>country</th>\n",
" <th>url</th>\n",
" <th>identifier</th>\n",
" <th>location.latitude</th>\n",
" <th>location.longiture</th>\n",
2021-07-22 11:03:05 +02:00
" <th>org_continent</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2022-02-14 13:34:42 +01:00
" <th>4233</th>\n",
2021-07-22 11:03:05 +02:00
" <td>5379</td>\n",
2021-10-08 14:28:56 +02:00
" <td>{'name': 'kettering university', 'alternativeN...</td>\n",
2021-07-22 11:03:05 +02:00
" <td>kettering university</td>\n",
" <td></td>\n",
2021-10-08 14:28:56 +02:00
" <td>UMI</td>\n",
2021-07-22 11:03:05 +02:00
" <td>https://www.kettering.edu</td>\n",
2021-10-08 14:28:56 +02:00
" <td>[{'identifier': 'https://ror.org/03rcspa57', '...</td>\n",
2022-02-14 13:34:42 +01:00
" <td></td>\n",
" <td></td>\n",
2021-07-22 11:03:05 +02:00
" <td>NA</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2021-10-08 14:28:56 +02:00
" system_metadata.id organization \\\n",
2022-02-14 13:34:42 +01:00
"4233 5379 {'name': 'kettering university', 'alternativeN... \n",
2021-07-22 11:03:05 +02:00
"\n",
2021-10-08 14:28:56 +02:00
" name alternativeName country url \\\n",
2022-02-14 13:34:42 +01:00
"4233 kettering university UMI https://www.kettering.edu \n",
2021-07-22 11:03:05 +02:00
"\n",
2021-10-08 14:28:56 +02:00
" identifier location.latitude \\\n",
2022-02-14 13:34:42 +01:00
"4233 [{'identifier': 'https://ror.org/03rcspa57', '... \n",
2021-07-22 11:03:05 +02:00
"\n",
2021-10-08 14:28:56 +02:00
" location.longiture org_continent \n",
2022-02-14 13:34:42 +01:00
"4233 NA "
2021-07-22 11:03:05 +02:00
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 26,
2021-07-22 11:03:05 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-10-08 14:28:56 +02:00
"opendoar_institutions.loc[opendoar_institutions.country == 'UMI', 'org_continent'] = 'NA'\n",
"opendoar_institutions[opendoar_institutions.country == 'UMI']"
2021-07-22 11:03:05 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**ROAR**"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 27,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [],
"source": [
2021-10-08 14:28:56 +02:00
"roar_institutions = roar_df.explode('location_country')\n",
"roar_institutions['location_country'] = roar_institutions.location_country.map(str.upper, na_action='ignore')\n",
"roar_institutions['location_country'] = roar_institutions.location_country.map(countrycode_iso2_to_countrycode_iso3)\n",
"roar_institutions['continent'] = roar_institutions.location_country.map(countrycode_to_continent)"
2021-07-22 11:03:05 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**FAIRsharing**"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 28,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [],
"source": [
2021-10-08 14:28:56 +02:00
"fairsharing_countries = fairsharing_df.explode('attributes.countries')\n",
"fairsharing_countries['countrycode'] = fairsharing_countries['attributes.countries'].map(country_to_countrycode)\n",
2021-07-22 11:03:05 +02:00
"fairsharing_countries['continent'] = fairsharing_countries.countrycode.map(countrycode_to_continent)"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 29,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2022-03-17 10:33:11 +01:00
"array(['European Union', 'Worldwide', nan], dtype=object)"
2021-07-22 11:03:05 +02:00
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 29,
2021-07-22 11:03:05 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-10-08 14:28:56 +02:00
"fairsharing_countries[fairsharing_countries.countrycode.isna()]['attributes.countries'].unique()"
2021-07-22 11:03:05 +02:00
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 30,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2022-03-17 10:33:11 +01:00
"array(['European Union', 'Worldwide', nan, 'Antarctica'], dtype=object)"
2021-07-22 11:03:05 +02:00
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 30,
2021-07-22 11:03:05 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-10-08 14:28:56 +02:00
"fairsharing_countries[fairsharing_countries.continent.isna()]['attributes.countries'].unique()"
2021-07-22 11:03:05 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Fix manually some rows"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 31,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [],
"source": [
2021-10-08 14:28:56 +02:00
"fairsharing_countries.loc[fairsharing_countries['attributes.countries'] == 'Republic of Ireland', ['attributes.countries', 'countrycode', 'continent']] = ['Ireland', 'IE', 'EU']\n",
"fairsharing_countries.loc[fairsharing_countries['attributes.countries'] == 'European Union', ['countrycode', 'continent']] = ['EU', 'EU']"
2021-07-22 11:03:05 +02:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Make Antactica disappear (only one repo)"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 32,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2021-10-08 14:28:56 +02:00
" <th>id</th>\n",
" <th>type</th>\n",
" <th>attributes.created-at</th>\n",
" <th>attributes.updated-at</th>\n",
" <th>attributes.metadata.doi</th>\n",
" <th>attributes.metadata.name</th>\n",
" <th>attributes.metadata.status</th>\n",
" <th>attributes.metadata.contacts</th>\n",
" <th>attributes.metadata.homepage</th>\n",
" <th>attributes.metadata.identifier</th>\n",
" <th>attributes.metadata.description</th>\n",
2022-03-17 10:33:11 +01:00
" <th>attributes.metadata.abbreviation</th>\n",
2021-10-08 14:28:56 +02:00
" <th>attributes.metadata.support-links</th>\n",
" <th>attributes.metadata.year-creation</th>\n",
" <th>attributes.metadata.data-processes</th>\n",
2022-03-17 10:33:11 +01:00
" <th>attributes.metadata.cross-references</th>\n",
2021-10-08 14:28:56 +02:00
" <th>attributes.legacy-ids</th>\n",
" <th>attributes.fairsharing-registry</th>\n",
" <th>attributes.record-type</th>\n",
" <th>attributes.subjects</th>\n",
" <th>attributes.domains</th>\n",
" <th>attributes.taxonomies</th>\n",
" <th>attributes.user-defined-tags</th>\n",
" <th>attributes.countries</th>\n",
" <th>attributes.name</th>\n",
" <th>attributes.abbreviation</th>\n",
" <th>attributes.url</th>\n",
" <th>attributes.doi</th>\n",
" <th>attributes.fairsharing-licence</th>\n",
" <th>attributes.description</th>\n",
" <th>attributes.publications</th>\n",
" <th>attributes.licence-links</th>\n",
2022-03-17 10:33:11 +01:00
" <th>attributes.url-for-logo</th>\n",
2021-10-08 14:28:56 +02:00
" <th>attributes.metadata.citations</th>\n",
" <th>attributes.metadata.associated-tools</th>\n",
" <th>attributes.metadata.deprecation-reason</th>\n",
2022-03-17 10:33:11 +01:00
" <th>attributes.metadata.data-access-condition.type</th>\n",
" <th>attributes.metadata.data-contact-information</th>\n",
" <th>attributes.metadata.data-deposition-condition.url</th>\n",
" <th>attributes.metadata.data-deposition-condition.type</th>\n",
" <th>attributes.metadata.deprecation-date</th>\n",
" <th>attributes.metadata.access-points</th>\n",
" <th>attributes.metadata.data-access-condition.url</th>\n",
" <th>attributes.metadata.resource-sustainability.url</th>\n",
" <th>attributes.metadata.resource-sustainability.name</th>\n",
" <th>attributes.metadata.data-preservation-policy.url</th>\n",
" <th>attributes.metadata.data-preservation-policy.name</th>\n",
" <th>attributes.metadata.data-access-for-pre-publication-review</th>\n",
" <th>attributes.metadata.data-versioning</th>\n",
" <th>attributes.metadata.data-curation.type</th>\n",
" <th>attributes.metadata.data-curation.url</th>\n",
" <th>attributes.metadata.citation-to-related-publications</th>\n",
2021-10-08 14:28:56 +02:00
" <th>attributes.metadata.tombstone</th>\n",
2021-07-22 11:03:05 +02:00
" <th>countrycode</th>\n",
" <th>continent</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2022-03-17 10:33:11 +01:00
" <th>325</th>\n",
2021-10-08 14:28:56 +02:00
" <td>2462</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2017-06-27T13:30:19.000Z</td>\n",
2022-03-17 10:33:11 +01:00
" <td>2021-12-02T18:05:26.741Z</td>\n",
2021-10-08 14:28:56 +02:00
" <td>10.25504/FAIRsharing.ewyejx</td>\n",
2021-07-22 11:03:05 +02:00
" <td>Antabif IPT - AntOBIS IPT - GBIF Belgium</td>\n",
2021-10-08 14:28:56 +02:00
" <td>ready</td>\n",
" <td>[{'contact-name': 'Anton Van de Putte', 'conta...</td>\n",
2021-07-22 11:03:05 +02:00
" <td>http://ipt.biodiversity.aq/</td>\n",
2021-10-08 14:28:56 +02:00
" <td>2462</td>\n",
" <td>The Belgium Biodiversity Platform hosts this d...</td>\n",
2022-03-17 10:33:11 +01:00
" <td>NaN</td>\n",
2021-10-08 14:28:56 +02:00
" <td>[{'url': 'a.heughebaert@biodiversity.be', 'nam...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-03-17 10:33:11 +01:00
" <td>NaN</td>\n",
2021-10-08 14:28:56 +02:00
" <td>[biodbcore-000944, bsg-d000944]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
2021-07-22 11:03:05 +02:00
" <td>[Biodiversity, Life Science]</td>\n",
2021-10-08 14:28:56 +02:00
" <td>[Taxonomic classification]</td>\n",
" <td>[All]</td>\n",
" <td>[]</td>\n",
" <td>Antarctica</td>\n",
" <td>FAIRsharing record for: Antabif IPT - AntOBIS ...</td>\n",
" <td>None</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.e...</td>\n",
" <td>10.25504/FAIRsharing.ewyejx</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: The Belgium...</td>\n",
" <td>[]</td>\n",
" <td>[{'licence-name': 'Apache License 2.0', 'licen...</td>\n",
2022-03-17 10:33:11 +01:00
" <td>None</td>\n",
" <td>[]</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>AQ</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1094</th>\n",
" <td>3654</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2021-12-02T09:58:02.958Z</td>\n",
" <td>2021-12-07T14:13:56.118Z</td>\n",
" <td>NaN</td>\n",
" <td>SCAR Antarctic Biodiversity Portal</td>\n",
" <td>ready</td>\n",
" <td>[{'contact-name': 'Anton Van de Putte', 'conta...</td>\n",
" <td>https://www.biodiversity.aq/</td>\n",
" <td>3654</td>\n",
" <td>Antarctic marine and terrestrial biodiversity ...</td>\n",
" <td>None</td>\n",
" <td>[{'url': 'https://www.biodiversity.aq/how-to/w...</td>\n",
" <td>2005.0</td>\n",
" <td>[{'url': 'https://www.biodiversity.aq/find-dat...</td>\n",
" <td>[{'url': 'https://www.re3data.org/repository/r...</td>\n",
" <td>[]</td>\n",
" <td>Database</td>\n",
" <td>knowledgebase</td>\n",
" <td>[Zoology, Taxonomy, Ecology, Biodiversity, Oce...</td>\n",
" <td>[]</td>\n",
" <td>[All]</td>\n",
" <td>[]</td>\n",
" <td>Antarctica</td>\n",
" <td>FAIRsharing record for: SCAR Antarctic Biodive...</td>\n",
" <td>None</td>\n",
" <td>https://fairsharing.org/fairsharing_records/3654</td>\n",
" <td>None</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: Antarctic m...</td>\n",
" <td>[]</td>\n",
" <td>[{'licence-name': 'SCAR Antarctic Biodiversity...</td>\n",
" <td>None</td>\n",
" <td>[]</td>\n",
" <td>[{'url': 'https://www.biodiversity.aq/tools/r-...</td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[{'url': 'https://data.biodiversity.aq/api/v1....</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-10-08 14:28:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:03:05 +02:00
" <td>AQ</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2022-03-17 10:33:11 +01:00
" id type attributes.created-at \\\n",
"325 2462 fairsharing-records 2017-06-27T13:30:19.000Z \n",
"1094 3654 fairsharing-records 2021-12-02T09:58:02.958Z \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.updated-at attributes.metadata.doi \\\n",
"325 2021-12-02T18:05:26.741Z 10.25504/FAIRsharing.ewyejx \n",
"1094 2021-12-07T14:13:56.118Z NaN \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.name attributes.metadata.status \\\n",
"325 Antabif IPT - AntOBIS IPT - GBIF Belgium ready \n",
"1094 SCAR Antarctic Biodiversity Portal ready \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.contacts \\\n",
"325 [{'contact-name': 'Anton Van de Putte', 'conta... \n",
"1094 [{'contact-name': 'Anton Van de Putte', 'conta... \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.homepage attributes.metadata.identifier \\\n",
"325 http://ipt.biodiversity.aq/ 2462 \n",
"1094 https://www.biodiversity.aq/ 3654 \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.description \\\n",
"325 The Belgium Biodiversity Platform hosts this d... \n",
"1094 Antarctic marine and terrestrial biodiversity ... \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.abbreviation \\\n",
"325 NaN \n",
"1094 None \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.support-links \\\n",
"325 [{'url': 'a.heughebaert@biodiversity.be', 'nam... \n",
"1094 [{'url': 'https://www.biodiversity.aq/how-to/w... \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.year-creation \\\n",
"325 NaN \n",
"1094 2005.0 \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.data-processes \\\n",
"325 NaN \n",
"1094 [{'url': 'https://www.biodiversity.aq/find-dat... \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.metadata.cross-references \\\n",
"325 NaN \n",
"1094 [{'url': 'https://www.re3data.org/repository/r... \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.legacy-ids attributes.fairsharing-registry \\\n",
"325 [biodbcore-000944, bsg-d000944] Database \n",
"1094 [] Database \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.record-type \\\n",
"325 repository \n",
"1094 knowledgebase \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.subjects \\\n",
"325 [Biodiversity, Life Science] \n",
"1094 [Zoology, Taxonomy, Ecology, Biodiversity, Oce... \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.domains attributes.taxonomies \\\n",
"325 [Taxonomic classification] [All] \n",
"1094 [] [All] \n",
2021-07-22 11:03:05 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.user-defined-tags attributes.countries \\\n",
"325 [] Antarctica \n",
"1094 [] Antarctica \n",
2021-07-22 11:03:05 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.name \\\n",
"325 FAIRsharing record for: Antabif IPT - AntOBIS ... \n",
"1094 FAIRsharing record for: SCAR Antarctic Biodive... \n",
2021-07-22 11:03:05 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.abbreviation \\\n",
"325 None \n",
"1094 None \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.url \\\n",
"325 https://fairsharing.org/10.25504/FAIRsharing.e... \n",
"1094 https://fairsharing.org/fairsharing_records/3654 \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.doi \\\n",
"325 10.25504/FAIRsharing.ewyejx \n",
"1094 None \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.fairsharing-licence \\\n",
"325 https://creativecommons.org/licenses/by-sa/4.0... \n",
"1094 https://creativecommons.org/licenses/by-sa/4.0... \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.description \\\n",
"325 This FAIRsharing record describes: The Belgium... \n",
"1094 This FAIRsharing record describes: Antarctic m... \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.publications \\\n",
"325 [] \n",
"1094 [] \n",
2021-10-08 14:28:56 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.licence-links \\\n",
"325 [{'licence-name': 'Apache License 2.0', 'licen... \n",
"1094 [{'licence-name': 'SCAR Antarctic Biodiversity... \n",
2021-07-22 11:03:05 +02:00
"\n",
2022-03-17 10:33:11 +01:00
" attributes.url-for-logo attributes.metadata.citations \\\n",
"325 None [] \n",
"1094 None [] \n",
"\n",
" attributes.metadata.associated-tools \\\n",
"325 NaN \n",
"1094 [{'url': 'https://www.biodiversity.aq/tools/r-... \n",
"\n",
" attributes.metadata.deprecation-reason \\\n",
"325 None \n",
"1094 \n",
"\n",
" attributes.metadata.data-access-condition.type \\\n",
"325 NaN \n",
"1094 NaN \n",
"\n",
" attributes.metadata.data-contact-information \\\n",
"325 NaN \n",
"1094 NaN \n",
"\n",
" attributes.metadata.data-deposition-condition.url \\\n",
"325 NaN \n",
"1094 NaN \n",
"\n",
" attributes.metadata.data-deposition-condition.type \\\n",
"325 NaN \n",
"1094 NaN \n",
"\n",
" attributes.metadata.deprecation-date \\\n",
"325 NaN \n",
"1094 NaN \n",
"\n",
" attributes.metadata.access-points \\\n",
"325 NaN \n",
"1094 [{'url': 'https://data.biodiversity.aq/api/v1.... \n",
"\n",
" attributes.metadata.data-access-condition.url \\\n",
"325 NaN \n",
"1094 NaN \n",
"\n",
" attributes.metadata.resource-sustainability.url \\\n",
"325 NaN \n",
"1094 NaN \n",
"\n",
" attributes.metadata.resource-sustainability.name \\\n",
"325 NaN \n",
"1094 NaN \n",
"\n",
" attributes.metadata.data-preservation-policy.url \\\n",
"325 NaN \n",
"1094 NaN \n",
"\n",
" attributes.metadata.data-preservation-policy.name \\\n",
"325 NaN \n",
"1094 NaN \n",
"\n",
" attributes.metadata.data-access-for-pre-publication-review \\\n",
"325 NaN \n",
"1094 NaN \n",
"\n",
" attributes.metadata.data-versioning \\\n",
"325 NaN \n",
"1094 NaN \n",
"\n",
" attributes.metadata.data-curation.type \\\n",
"325 NaN \n",
"1094 NaN \n",
"\n",
" attributes.metadata.data-curation.url \\\n",
"325 NaN \n",
"1094 NaN \n",
"\n",
" attributes.metadata.citation-to-related-publications \\\n",
"325 NaN \n",
"1094 NaN \n",
"\n",
" attributes.metadata.tombstone countrycode continent \n",
"325 NaN AQ NaN \n",
"1094 NaN AQ NaN "
2021-07-22 11:03:05 +02:00
]
},
2022-03-17 10:33:11 +01:00
"execution_count": 32,
2021-07-22 11:03:05 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-10-08 14:28:56 +02:00
"fairsharing_countries.loc[fairsharing_countries['attributes.countries'] == 'Antarctica', ['countrycode', 'continent']] = ['AQ', np.nan]\n",
2021-07-22 11:03:05 +02:00
"fairsharing_countries[fairsharing_countries.countrycode == 'AQ']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Country coverage"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 33,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"name": "re3data",
"type": "bar",
"x": [
"USA",
"DEU",
"CAN",
"GBR",
"EEC",
"AAA",
"FRA",
"AUS",
"CHE",
"JPN",
"NLD",
"ESP",
"IND",
"CHN",
"ITA",
"NOR",
2021-10-08 14:28:56 +02:00
"AUT",
2021-07-22 11:03:05 +02:00
"SWE",
"BEL",
"DNK",
"RUS",
2021-10-08 14:28:56 +02:00
"POL",
2021-07-22 11:03:05 +02:00
"GRC",
2021-10-08 14:28:56 +02:00
"MEX",
2021-07-22 11:03:05 +02:00
"IRL",
2022-02-14 13:34:42 +01:00
"ZAF",
"CZE",
2021-07-22 11:03:05 +02:00
"TWN",
2021-10-08 14:28:56 +02:00
"NZL",
"BRA",
2021-07-22 11:03:05 +02:00
"PRT",
2022-02-14 13:34:42 +01:00
"FIN",
2021-07-22 11:03:05 +02:00
"EST",
"KOR",
"COL",
"SRB",
2022-02-14 13:34:42 +01:00
"ISR",
2021-07-22 11:03:05 +02:00
"LTU",
"SGP",
2022-02-14 13:34:42 +01:00
"ARG",
2021-07-22 11:03:05 +02:00
"HUN",
2021-10-08 14:28:56 +02:00
"TUR",
2022-02-14 13:34:42 +01:00
"SVN",
2021-07-22 11:03:05 +02:00
"ISL",
"KEN",
2021-10-08 14:28:56 +02:00
"HKG",
2022-02-14 13:34:42 +01:00
"UKR",
2021-07-22 11:03:05 +02:00
"ROU",
2022-02-14 13:34:42 +01:00
"IDN",
2021-07-22 11:03:05 +02:00
"SVK",
2021-10-08 14:28:56 +02:00
"LUX",
2022-02-14 13:34:42 +01:00
"PAK",
2021-10-08 14:28:56 +02:00
"PER",
2022-02-14 13:34:42 +01:00
"LVA",
"THA",
"CYP",
2021-07-22 11:03:05 +02:00
"CHL",
2021-10-08 14:28:56 +02:00
"HRV",
2022-02-14 13:34:42 +01:00
"GRL",
"CMR",
2021-10-08 14:28:56 +02:00
"SDN",
2022-02-14 13:34:42 +01:00
"VNM",
"GHA",
"LBN",
"BFA",
2021-07-22 11:03:05 +02:00
"BEN",
"PAN",
2021-10-08 14:28:56 +02:00
"MKD",
2022-02-14 13:34:42 +01:00
"PHL",
"BIH",
"FJI",
"ETH",
2021-07-22 11:03:05 +02:00
"KAZ",
2021-10-08 14:28:56 +02:00
"CIV",
2022-02-14 13:34:42 +01:00
"LAO",
2021-10-08 14:28:56 +02:00
"TUN",
"MWI",
2022-02-14 13:34:42 +01:00
"LKA",
"NAM",
"NCL",
2021-10-08 14:28:56 +02:00
"SEN",
2022-02-14 13:34:42 +01:00
"AZE",
"SAU",
2021-10-08 14:28:56 +02:00
"PYF",
2022-02-14 13:34:42 +01:00
"EGY"
2021-07-22 11:03:05 +02:00
],
"y": [
2022-02-14 13:34:42 +01:00
2993,
1154,
601,
577,
404,
2021-07-22 11:03:05 +02:00
349,
2022-02-14 13:34:42 +01:00
279,
240,
134,
133,
131,
100,
2021-10-08 14:28:56 +02:00
87,
76,
2022-02-14 13:34:42 +01:00
66,
62,
2021-10-08 14:28:56 +02:00
61,
60,
2022-02-14 13:34:42 +01:00
46,
40,
2021-10-08 14:28:56 +02:00
36,
2022-02-14 13:34:42 +01:00
34,
2021-07-22 11:03:05 +02:00
31,
2021-10-08 14:28:56 +02:00
24,
2022-02-14 13:34:42 +01:00
22,
22,
22,
19,
19,
2021-10-08 14:28:56 +02:00
19,
18,
2021-07-22 11:03:05 +02:00
18,
14,
14,
13,
2021-10-08 14:28:56 +02:00
13,
2022-02-14 13:34:42 +01:00
13,
2021-07-22 11:03:05 +02:00
11,
11,
2021-10-08 14:28:56 +02:00
10,
2021-07-22 11:03:05 +02:00
9,
7,
7,
6,
6,
2022-02-14 13:34:42 +01:00
6,
2021-07-22 11:03:05 +02:00
4,
4,
4,
4,
4,
3,
3,
3,
3,
3,
3,
2021-10-08 14:28:56 +02:00
3,
2022-02-14 13:34:42 +01:00
3,
2,
2,
2021-07-22 11:03:05 +02:00
2,
2,
2,
2,
2,
2,
2,
2021-10-08 14:28:56 +02:00
1,
1,
1,
2021-07-22 11:03:05 +02:00
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
2022-02-14 13:34:42 +01:00
1,
2021-07-22 11:03:05 +02:00
1
]
},
{
"name": "openDOAR",
"type": "bar",
"visible": "legendonly",
"x": [
"USA",
"JPN",
"GBR",
"DEU",
"ESP",
2021-10-08 14:28:56 +02:00
"PER",
2022-02-14 13:34:42 +01:00
"TUR",
2021-07-22 11:03:05 +02:00
"IDN",
"FRA",
"BRA",
"HRV",
"ITA",
"POL",
"UKR",
"IND",
"COL",
2022-02-14 13:34:42 +01:00
"CAN",
2021-07-22 11:03:05 +02:00
"AUS",
"NLD",
"ARG",
"NOR",
"CHN",
2021-10-08 14:28:56 +02:00
"PRT",
2022-02-14 13:34:42 +01:00
"TWN",
2021-07-22 11:03:05 +02:00
"MEX",
"RUS",
2022-02-14 13:34:42 +01:00
"SWE",
2021-10-08 14:28:56 +02:00
"SRB",
"ZAF",
2022-02-14 13:34:42 +01:00
"AUT",
2021-10-08 14:28:56 +02:00
"HUN",
2022-02-14 13:34:42 +01:00
"KEN",
2021-07-22 11:03:05 +02:00
"GRC",
2022-02-14 13:34:42 +01:00
"BLR",
2021-07-22 11:03:05 +02:00
"CHE",
2022-02-14 13:34:42 +01:00
"ECU",
"KOR",
2021-07-22 11:03:05 +02:00
"BEL",
"NGA",
"IRL",
2021-10-08 14:28:56 +02:00
"CHL",
2022-02-14 13:34:42 +01:00
"CZE",
2021-07-22 11:03:05 +02:00
"MYS",
"FIN",
"DZA",
"NZL",
"LTU",
"IRN",
"THA",
"VEN",
"LKA",
"BGD",
2022-02-14 13:34:42 +01:00
"DNK",
2021-07-22 11:03:05 +02:00
"CUB",
2022-02-14 13:34:42 +01:00
"TZA",
2021-07-22 11:03:05 +02:00
"SVN",
2021-10-08 14:28:56 +02:00
"SDN",
2022-02-14 13:34:42 +01:00
"KAZ",
"MDA",
2021-07-22 11:03:05 +02:00
"SAU",
2022-02-14 13:34:42 +01:00
"UGA",
2021-07-22 11:03:05 +02:00
"NIC",
"ZWE",
"BGR",
2022-02-14 13:34:42 +01:00
"URY",
2021-07-22 11:03:05 +02:00
"HKG",
2022-02-14 13:34:42 +01:00
"CRI",
2021-07-22 11:03:05 +02:00
"PHL",
2022-02-14 13:34:42 +01:00
"SLV",
2021-07-22 11:03:05 +02:00
"EGY",
2021-10-08 14:28:56 +02:00
"EST",
2021-07-22 11:03:05 +02:00
"PSE",
2021-10-08 14:28:56 +02:00
"SGP",
"JAM",
"CYP",
2022-02-14 13:34:42 +01:00
"GHA",
"PAN",
"ROU",
2021-07-22 11:03:05 +02:00
"ETH",
2021-10-08 14:28:56 +02:00
"MKD",
"ARE",
"LVA",
2022-02-14 13:34:42 +01:00
"HND",
"PAK",
2021-10-08 14:28:56 +02:00
"SEN",
"SVK",
2021-07-22 11:03:05 +02:00
"ISL",
2021-10-08 14:28:56 +02:00
"DOM",
2022-02-14 13:34:42 +01:00
"LUX",
2021-10-08 14:28:56 +02:00
"LBN",
2021-07-22 11:03:05 +02:00
"GEO",
"ZMB",
2022-02-14 13:34:42 +01:00
"BOL",
"LBY",
"IRQ",
"MMR",
"MAR",
"BWA",
"FJI",
2021-07-22 11:03:05 +02:00
"AZE",
"LSO",
2022-02-14 13:34:42 +01:00
"NAM",
"ARM",
"RWA",
2021-07-22 11:03:05 +02:00
"PRY",
"BIH",
"TUN",
2022-02-14 13:34:42 +01:00
"MOZ",
"CPV",
2021-07-22 11:03:05 +02:00
"UMI",
"TTO",
2022-02-14 13:34:42 +01:00
"TJK",
"VNM",
2021-07-22 11:03:05 +02:00
"AFG",
"SOM",
"QAT",
"PRI",
"NPL",
"NCL",
"MWI",
"MLT",
"ALA",
2022-02-14 13:34:42 +01:00
"KWT",
2021-07-22 11:03:05 +02:00
"KGZ",
"GTM",
"GLP",
"CMR",
"AND",
"ALB",
2022-02-14 13:34:42 +01:00
"LAO"
2021-07-22 11:03:05 +02:00
],
"y": [
2022-02-14 13:34:42 +01:00
919,
681,
317,
281,
178,
173,
172,
163,
161,
154,
2021-07-22 11:03:05 +02:00
148,
2022-02-14 13:34:42 +01:00
140,
128,
106,
102,
100,
2021-07-22 11:03:05 +02:00
99,
2021-10-08 14:28:56 +02:00
89,
2022-02-14 13:34:42 +01:00
74,
2021-10-08 14:28:56 +02:00
73,
2021-07-22 11:03:05 +02:00
67,
2022-02-14 13:34:42 +01:00
64,
2021-10-08 14:28:56 +02:00
62,
2021-07-22 11:03:05 +02:00
60,
2021-10-08 14:28:56 +02:00
51,
2021-07-22 11:03:05 +02:00
50,
2022-02-14 13:34:42 +01:00
50,
48,
48,
47,
2021-10-08 14:28:56 +02:00
45,
2021-07-22 11:03:05 +02:00
44,
2022-02-14 13:34:42 +01:00
39,
38,
2021-07-22 11:03:05 +02:00
38,
2021-10-08 14:28:56 +02:00
38,
2021-07-22 11:03:05 +02:00
37,
33,
30,
30,
27,
2021-10-08 14:28:56 +02:00
27,
2021-07-22 11:03:05 +02:00
25,
2022-02-14 13:34:42 +01:00
23,
2021-07-22 11:03:05 +02:00
20,
19,
2022-02-14 13:34:42 +01:00
19,
2021-07-22 11:03:05 +02:00
18,
17,
16,
16,
15,
15,
14,
14,
2022-02-14 13:34:42 +01:00
13,
2021-07-22 11:03:05 +02:00
12,
12,
12,
12,
2021-10-08 14:28:56 +02:00
12,
2021-07-22 11:03:05 +02:00
11,
11,
11,
2022-02-14 13:34:42 +01:00
10,
10,
2021-07-22 11:03:05 +02:00
10,
9,
9,
9,
9,
8,
7,
6,
6,
6,
6,
6,
5,
5,
5,
2021-10-08 14:28:56 +02:00
5,
5,
2021-07-22 11:03:05 +02:00
4,
4,
4,
4,
4,
4,
2022-02-14 13:34:42 +01:00
4,
4,
2021-07-22 11:03:05 +02:00
3,
3,
3,
3,
3,
2021-10-08 14:28:56 +02:00
3,
3,
2021-07-22 11:03:05 +02:00
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
2022-02-14 13:34:42 +01:00
1,
2021-07-22 11:03:05 +02:00
1
]
},
{
"name": "ROAR",
"type": "bar",
"visible": "legendonly",
"x": [
"USA",
"DEU",
2021-10-08 14:28:56 +02:00
"GBR",
2021-07-22 11:03:05 +02:00
"JPN",
"ESP",
"BRA",
"IDN",
2021-10-08 14:28:56 +02:00
"TUR",
2021-07-22 11:03:05 +02:00
"PER",
"IND",
"COL",
2022-02-14 13:34:42 +01:00
"POL",
2021-07-22 11:03:05 +02:00
"UKR",
"FRA",
"ITA",
"CHN",
2021-10-08 14:28:56 +02:00
"CAN",
2021-07-22 11:03:05 +02:00
"AUS",
"TWN",
"SWE",
"ARG",
2021-10-08 14:28:56 +02:00
"RUS",
2021-07-22 11:03:05 +02:00
"PRT",
"NOR",
"MEX",
2021-10-08 14:28:56 +02:00
"KOR",
2021-07-22 11:03:05 +02:00
"ZAF",
"HUN",
2022-02-14 13:34:42 +01:00
"NLD",
2021-07-22 11:03:05 +02:00
"MYS",
"GRC",
"SRB",
"BEL",
"BLR",
"ECU",
"KEN",
2021-10-08 14:28:56 +02:00
"CHE",
2021-07-22 11:03:05 +02:00
"IRL",
"AUT",
2021-10-08 14:28:56 +02:00
"CHL",
2021-07-22 11:03:05 +02:00
"VEN",
"FIN",
"NZL",
2021-10-08 14:28:56 +02:00
"MDA",
2022-02-14 13:34:42 +01:00
"NGA",
2021-07-22 11:03:05 +02:00
"ROU",
"DNK",
2021-10-08 14:28:56 +02:00
"DZA",
2021-07-22 11:03:05 +02:00
"CZE",
"IRN",
2021-10-08 14:28:56 +02:00
"PHL",
2022-02-14 13:34:42 +01:00
"THA",
2021-07-22 11:03:05 +02:00
"CUB",
"SDN",
2021-10-08 14:28:56 +02:00
"SLV",
2021-07-22 11:03:05 +02:00
"ZWE",
"EGY",
2021-10-08 14:28:56 +02:00
"BGD",
2022-02-14 13:34:42 +01:00
"BGR",
2021-10-08 14:28:56 +02:00
"LTU",
2021-07-22 11:03:05 +02:00
"SVN",
2021-10-08 14:28:56 +02:00
"KAZ",
2021-07-22 11:03:05 +02:00
"HKG",
"SAU",
2022-02-14 13:34:42 +01:00
"CRI",
"TZA",
2021-10-08 14:28:56 +02:00
"UGA",
2022-02-14 13:34:42 +01:00
"PAK",
2021-10-08 14:28:56 +02:00
"PSE",
2021-07-22 11:03:05 +02:00
"NIC",
2022-02-14 13:34:42 +01:00
"CYP",
2021-10-08 14:28:56 +02:00
"URY",
2021-07-22 11:03:05 +02:00
"HRV",
2022-02-14 13:34:42 +01:00
"SGP",
"EST",
2021-10-08 14:28:56 +02:00
"GHA",
2021-07-22 11:03:05 +02:00
"AZE",
2021-10-08 14:28:56 +02:00
"PAN",
2021-07-22 11:03:05 +02:00
"LVA",
2021-10-08 14:28:56 +02:00
"JAM",
2021-07-22 11:03:05 +02:00
"BWA",
"KGZ",
2022-02-14 13:34:42 +01:00
"DOM",
2021-10-08 14:28:56 +02:00
"MAR",
"IRQ",
"LBN",
2021-07-22 11:03:05 +02:00
"ETH",
"BOL",
"NPL",
2021-10-08 14:28:56 +02:00
"BIH",
2022-02-14 13:34:42 +01:00
"MKD",
"LKA",
2021-07-22 11:03:05 +02:00
"LBY",
2021-10-08 14:28:56 +02:00
"SVK",
2022-02-14 13:34:42 +01:00
"GUF",
2021-10-08 14:28:56 +02:00
"TUN",
2022-02-14 13:34:42 +01:00
"NAM",
"ARM",
"SEN",
"ARE",
2021-10-08 14:28:56 +02:00
"GLP",
"MOZ",
"FJI",
"DMA",
2021-07-22 11:03:05 +02:00
"LSO",
"LUX",
2022-02-14 13:34:42 +01:00
"MTQ",
2021-07-22 11:03:05 +02:00
"GEO",
2021-10-08 14:28:56 +02:00
"ISL",
2021-07-22 11:03:05 +02:00
"UMI",
2021-10-08 14:28:56 +02:00
"ATF",
2022-02-14 13:34:42 +01:00
"WSM",
2021-10-08 14:28:56 +02:00
"ISR",
2021-07-22 11:03:05 +02:00
"SYR",
"SOM",
2021-10-08 14:28:56 +02:00
"CMR",
2021-07-22 11:03:05 +02:00
"RWA",
"QAT",
2021-10-08 14:28:56 +02:00
"PYF",
2021-07-22 11:03:05 +02:00
"PRK",
2021-10-08 14:28:56 +02:00
"PRI",
"NCL",
"MWI",
2021-07-22 11:03:05 +02:00
"MLT",
2021-10-08 14:28:56 +02:00
"HND",
2021-07-22 11:03:05 +02:00
"ALB",
"AFG"
],
"y": [
2022-02-14 13:34:42 +01:00
896,
263,
2021-10-08 14:28:56 +02:00
262,
2022-02-14 13:34:42 +01:00
244,
202,
186,
181,
167,
161,
133,
2021-10-08 14:28:56 +02:00
131,
128,
2022-02-14 13:34:42 +01:00
120,
102,
2021-10-08 14:28:56 +02:00
99,
2021-07-22 11:03:05 +02:00
97,
2021-10-08 14:28:56 +02:00
97,
84,
83,
2021-07-22 11:03:05 +02:00
76,
2022-02-14 13:34:42 +01:00
72,
2021-07-22 11:03:05 +02:00
68,
2022-02-14 13:34:42 +01:00
62,
2021-07-22 11:03:05 +02:00
59,
2022-02-14 13:34:42 +01:00
54,
2021-10-08 14:28:56 +02:00
53,
52,
2021-07-22 11:03:05 +02:00
46,
2022-02-14 13:34:42 +01:00
46,
42,
2021-10-08 14:28:56 +02:00
42,
2021-07-22 11:03:05 +02:00
41,
37,
35,
2022-02-14 13:34:42 +01:00
32,
30,
2021-10-08 14:28:56 +02:00
30,
2021-07-22 11:03:05 +02:00
27,
2021-10-08 14:28:56 +02:00
27,
27,
24,
2021-07-22 11:03:05 +02:00
23,
22,
2022-02-14 13:34:42 +01:00
19,
17,
2021-07-22 11:03:05 +02:00
17,
17,
16,
16,
16,
15,
2021-10-08 14:28:56 +02:00
15,
2021-07-22 11:03:05 +02:00
14,
13,
13,
12,
12,
11,
11,
11,
10,
10,
10,
2021-10-08 14:28:56 +02:00
10,
2021-07-22 11:03:05 +02:00
9,
9,
8,
7,
7,
7,
7,
7,
7,
6,
6,
2022-02-14 13:34:42 +01:00
6,
2021-07-22 11:03:05 +02:00
5,
5,
2021-10-08 14:28:56 +02:00
5,
2021-07-22 11:03:05 +02:00
4,
4,
4,
4,
4,
3,
3,
3,
3,
3,
3,
3,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2,
2021-10-08 14:28:56 +02:00
2,
2,
2,
2022-02-14 13:34:42 +01:00
2,
2021-10-08 14:28:56 +02:00
1,
1,
2021-07-22 11:03:05 +02:00
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1
]
},
{
"name": "FAIRsharing",
"type": "bar",
"visible": "legendonly",
"x": [
"USA",
"GBR",
"DEU",
"FRA",
"CHE",
"NLD",
2022-03-17 10:33:11 +01:00
"CHN",
2021-07-22 11:03:05 +02:00
"ITA",
"ESP",
2022-03-17 10:33:11 +01:00
"CAN",
2021-10-08 14:28:56 +02:00
"BEL",
2022-03-17 10:33:11 +01:00
"JPN",
2021-07-22 11:03:05 +02:00
"SWE",
"NOR",
"EU",
2022-03-17 10:33:11 +01:00
"CZE",
2021-10-08 14:28:56 +02:00
"DNK",
"AUT",
2022-03-17 10:33:11 +01:00
"AUS",
"FIN",
"PRT",
2021-10-08 14:28:56 +02:00
"IRL",
2021-07-22 11:03:05 +02:00
"ISR",
"HUN",
"GRC",
2022-03-17 10:33:11 +01:00
"LUX",
2021-07-22 11:03:05 +02:00
"MLT",
2022-03-17 10:33:11 +01:00
"HRV",
2021-07-22 11:03:05 +02:00
"LTU",
2021-10-08 14:28:56 +02:00
"ISL",
2021-07-22 11:03:05 +02:00
"SVK",
"MNE",
"IND",
"POL",
"SGP",
2021-10-08 14:28:56 +02:00
"KOR",
2021-07-22 11:03:05 +02:00
"RUS",
2022-03-17 10:33:11 +01:00
"TWN",
2021-07-22 11:03:05 +02:00
"MEX",
2022-03-17 10:33:11 +01:00
"ZAF",
2021-07-22 11:03:05 +02:00
"BRA",
2022-03-17 10:33:11 +01:00
"NZL",
2021-07-22 11:03:05 +02:00
"SAU",
"BGR",
2022-03-17 10:33:11 +01:00
"TUR",
"ARG",
"HKG",
2021-10-08 14:28:56 +02:00
"EST",
"PAK",
2022-03-17 10:33:11 +01:00
"ROU",
"UGA",
"THA",
2021-07-22 11:03:05 +02:00
"AQ",
2022-03-17 10:33:11 +01:00
"MAR",
"CRI",
"CYP",
"MLI",
"FRO",
2021-07-22 11:03:05 +02:00
"BEN",
2022-03-17 10:33:11 +01:00
"VNM",
2021-10-08 14:28:56 +02:00
"BIH",
2022-03-17 10:33:11 +01:00
"URY",
2021-07-22 11:03:05 +02:00
"CHL",
"CMR",
"COL",
2022-03-17 10:33:11 +01:00
"TGO",
2021-07-22 11:03:05 +02:00
"EGY",
2022-03-17 10:33:11 +01:00
"SLV",
2021-07-22 11:03:05 +02:00
"ETH",
"GRL",
2022-03-17 10:33:11 +01:00
"MOZ",
"HND",
2021-07-22 11:03:05 +02:00
"IDN",
"ARE",
2022-03-17 10:33:11 +01:00
"PAN",
2021-07-22 11:03:05 +02:00
"KEN",
"LVA",
"MDG",
2022-03-17 10:33:11 +01:00
"NIC",
"NGA",
2021-07-22 11:03:05 +02:00
"NER",
2022-03-17 10:33:11 +01:00
"MWI",
"MRT",
2021-07-22 11:03:05 +02:00
"ZWE"
],
"y": [
2022-03-17 10:33:11 +01:00
710,
260,
199,
169,
2021-07-22 11:03:05 +02:00
114,
2022-03-17 10:33:11 +01:00
103,
102,
97,
89,
89,
2021-10-08 14:28:56 +02:00
85,
2022-03-17 10:33:11 +01:00
84,
72,
70,
70,
70,
69,
63,
63,
2021-07-22 11:03:05 +02:00
62,
2022-03-17 10:33:11 +01:00
61,
2021-07-22 11:03:05 +02:00
59,
58,
2021-10-08 14:28:56 +02:00
58,
2022-03-17 10:33:11 +01:00
56,
50,
2021-10-08 14:28:56 +02:00
50,
49,
49,
49,
49,
48,
2022-03-17 10:33:11 +01:00
35,
12,
2021-10-08 14:28:56 +02:00
11,
2021-07-22 11:03:05 +02:00
11,
10,
2022-03-17 10:33:11 +01:00
10,
2021-07-22 11:03:05 +02:00
9,
2021-10-08 14:28:56 +02:00
9,
2021-07-22 11:03:05 +02:00
8,
8,
6,
2022-03-17 10:33:11 +01:00
5,
2021-07-22 11:03:05 +02:00
3,
3,
3,
3,
2,
2,
2,
2,
2,
2,
2,
2,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
2021-10-08 14:28:56 +02:00
1,
2021-07-22 11:03:05 +02:00
1
]
}
],
"layout": {
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
2022-02-14 13:34:42 +01:00
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
2021-07-22 11:03:05 +02:00
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
2022-02-14 13:34:42 +01:00
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
2021-07-22 11:03:05 +02:00
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
2022-02-14 13:34:42 +01:00
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
2021-07-22 11:03:05 +02:00
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Country coverage"
},
"xaxis": {
"tickangle": 45,
"tickfont": {
"size": 12
}
}
}
},
"text/html": [
2022-03-17 10:33:11 +01:00
"<div> <div id=\"af3c682d-68c0-4031-ac7c-9c0440a1bd42\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"af3c682d-68c0-4031-ac7c-9c0440a1bd42\")) { Plotly.newPlot( \"af3c682d-68c0-4031-ac7c-9c0440a1bd42\", [{\"name\":\"re3data\",\"x\":[\"USA\",\"DEU\",\"CAN\",\"GBR\",\"EEC\",\"AAA\",\"FRA\",\"AUS\",\"CHE\",\"JPN\",\"NLD\",\"ESP\",\"IND\",\"CHN\",\"ITA\",\"NOR\",\"AUT\",\"SWE\",\"BEL\",\"DNK\",\"RUS\",\"POL\",\"GRC\",\"MEX\",\"IRL\",\"ZAF\",\"CZE\",\"TWN\",\"NZL\",\"BRA\",\"PRT\",\"FIN\",\"EST\",\"KOR\",\"COL\",\"SRB\",\"ISR\",\"LTU\",\"SGP\",\"ARG\",\"HUN\",\"TUR\",\"SVN\",\"ISL\",\"KEN\",\"HKG\",\"UKR\",\"ROU\",\"IDN\",\"SVK\",\"LUX\",\"PAK\",\"PER\",\"LVA\",\"THA\",\"CYP\",\"CHL\",\"HRV\",\"GRL\",\"CMR\",\"SDN\",\"VNM\",\"GHA\",\"LBN\",\"BFA\",\"BEN\",\"PAN\",\"MKD\",\"PHL\",\"BIH\",\"FJI\",\"ETH\",\"KAZ\",\"CIV\",\"LAO\",\"TUN\",\"MWI\",\"LKA\",\"NAM\",\"NCL\",\"SEN\",\"AZE\",\"SAU\",\"PYF\",\"EGY\"],\"y\":[2993,1154,601,577,404,349,279,240,134,133,131,100,87,76,66,62,61,60,46,40,36,34,31,24,22,22,22,19,19,19,18,18,14,14,13,13,13,11,11,10,9,7,7,6,6,6,4,4,4,4,4,3,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],\"type\":\"bar\"},{\"name\":\"openDOAR\",\"visible\":\"legendonly\",\"x\":[\"USA\",\"JPN\",\"GBR\",\"DEU\",\"ESP\",\"PER\",\"TUR\",\"IDN\",\"FRA\",\"BRA\",\"HRV\",\"ITA\",\"POL\",\"UKR\",\"IND\",\"COL\",\"CAN\",\"AUS\",\"NLD\",\"ARG\",\"NOR\",\"CHN\",\"PRT\",\"TWN\",\"MEX\",\"RUS\",\"SWE\",\"SRB\",\"ZAF\",\"AUT\",\"HUN\",\"KEN\",\"GRC\",\"BLR\",\"CHE\",\"ECU\",\"KOR\",\"BEL\",\"NGA\",\"IRL\",\"CHL\",\"CZE\",\"MYS\",\"FIN\",\"DZA\",\"NZL\",\"LTU\",\"IRN\",\"THA\",\"VEN\",\"LKA\",\"BGD\",\"DNK\",\"CUB\",\"TZA\",\"SVN\",\"SDN\",\"KAZ\",\"MDA\",\"SAU\",\"UGA\",\"NIC\",\"ZWE\",\"BGR\",\"URY\",\"HKG\",\"CRI\",\"PHL\",\"SLV\",\"EGY\",\"EST\",\"PSE\",\"SGP\",\"JAM\",\"CYP\",\"GHA\",\"PAN\",\"ROU\",\"ETH\",\"MKD\",\"ARE\",\"LVA\",\"HND\",\"PAK\",\"SEN\",\"SVK\",\"ISL\",\"DOM\",\"LUX\",\"LBN\",\"GEO\",\"ZMB\",\"BOL\",\"LBY\",\"IRQ\",\"MMR\",\"MAR\",\"BWA\",\"FJI\",\"AZE\",\"LSO\",\"NAM\",\"ARM\",\"RWA\",\"PRY\",\"BIH\",\"TUN\",\"MOZ\",\"CPV\",\"UMI\",\"TTO\",\"TJK\",\"VNM\",\"AFG\",\"SOM\",\"QAT\",\"PRI\",\"NPL\",\"NCL\",\"MWI\",\"MLT\",\"ALA\",\"KWT\",\"KGZ\",\"GTM\",\"GLP\",\"CMR\",\"AND\",\"ALB\",\"LAO\"],\"y\":[919,681,317,281,178,173,172,163,161,154,148,140,128,106,102,100,99,89,74,73,67,64,62,60,51,50,50,48,48,47,45,44,39,38,38,38,37,33,30,30,27,27,25,23,20,19,19,18,17,16,16,15,15,14,14,13,12,12,12,12,12,11,11,11,10,10,10,9,9,9,9,8,7,6,6,6,6,6,5,5,5,5,5,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],\"type\":\"bar\"},{\"name\":\"ROAR\",\"visible\":\"legendonly\",\"x\":[\"USA\",\"DEU\",\"GBR\",\"JPN\",\"ESP\",\"BRA\",\"IDN\",\"TUR\",\"PER\",\"IND\",\"COL\",\"POL\",\"UKR\",\"FRA\",\"ITA\",\"CHN\",\"CAN\",\"AUS\",\"TWN\",\"SWE\",\"ARG\",\"RUS\",\"PRT\",\"NOR\",\"MEX\",\"KOR\",\"ZAF\",\"HUN\",\"NLD\",\"MYS\",\"GRC\",\"SRB\",\"BEL\",\"BLR\",\"ECU\",\"KEN\",\"CHE\",\"IRL\",\"AUT\",\"CHL\",\"VEN\",\"FIN\",\"NZL\",\"MDA\",\"NGA\",\"ROU\",\"DNK\",\"DZA\",\"CZE\",\"IRN\",\"PHL\",\"THA\",\"CUB\",\"SDN\",\"SLV\",\"ZWE\",\"EGY\",\"BGD\",\"BGR\",\"LTU\",\"SVN\",\"KAZ\",\"HKG\",\"SAU\",\"CRI\",\"TZA\",\"UGA\",\"PAK\",\"PSE\",\"NIC\",\"CYP\",\"URY\",\"HRV\",\"SGP\",\"EST\",\"GHA\",\"AZE\",\"PAN\",\"LVA\",\"JAM\",\"BWA\",\"KGZ\",\"DOM\",\"MAR\",\"IRQ\",\"LBN\",\"ETH\",\"BOL\",\"NPL\",\"BIH\",\"MKD\",\"LKA\",\"LBY\",\"SVK\",\"GUF\",\"TUN\",\"NAM\",\"ARM\",\"SEN\",\"ARE\",\"GLP\",\"MOZ\",\"FJI\",\"DMA\",\"LSO\",\"LUX\",\"MTQ\",\"GEO\",\"ISL\",\"UMI\",\"ATF\",\"WSM\",\"ISR\",\"SYR\",\"SOM\",\"CMR\",\"RWA\",\"QAT\",\"PYF\",\"PRK\",\"PRI\",\"NCL\",\"MWI\",\"MLT\",\"HND\",\"ALB\",\"AFG\"],\"y\":[896,263,262,244,202,186,181,167,161,13
2021-07-22 11:03:05 +02:00
" \n",
2022-03-17 10:33:11 +01:00
"var gd = document.getElementById('af3c682d-68c0-4031-ac7c-9c0440a1bd42');\n",
2021-07-22 11:03:05 +02:00
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2021-10-08 14:28:56 +02:00
"data1 = re3data_institutions.groupby('institutionCountry')[['orgIdentifier']].count().sort_values('orgIdentifier', ascending=False)\n",
"data2 = opendoar_institutions.groupby('country')[['system_metadata.id']].count().sort_values('system_metadata.id', ascending=False)\n",
"data3 = roar_institutions.groupby('location_country')[['eprintid']].count().sort_values('eprintid', ascending=False)\n",
"data4 = fairsharing_countries.groupby('countrycode')[['id']].count().sort_values('id', ascending=False)\n",
2021-07-22 11:03:05 +02:00
"\n",
"plot = [\n",
" go.Bar(\n",
" x=data1.index,\n",
2021-10-08 14:28:56 +02:00
" y=data1['orgIdentifier'],\n",
2021-07-22 11:03:05 +02:00
" name='re3data'\n",
" ),\n",
" go.Bar(\n",
" x=data2.index,\n",
2021-10-08 14:28:56 +02:00
" y=data2['system_metadata.id'],\n",
2021-07-22 11:03:05 +02:00
" name='openDOAR',\n",
" visible = 'legendonly'\n",
" ),\n",
" go.Bar(\n",
" x=data3.index,\n",
" y=data3['eprintid'],\n",
" name='ROAR',\n",
" visible = 'legendonly'\n",
" ),\n",
" go.Bar(\n",
" x=data4.index,\n",
2021-10-08 14:28:56 +02:00
" y=data4['id'],\n",
2021-07-22 11:03:05 +02:00
" name='FAIRsharing',\n",
" visible = 'legendonly'\n",
" )\n",
"]\n",
"\n",
"layout = go.Layout(\n",
" title='Country coverage',\n",
" xaxis=dict(tickangle=45, tickfont=dict(size=12))\n",
")\n",
"\n",
"go.Figure(plot, layout).show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Continental coverage"
]
},
{
"cell_type": "code",
2022-03-17 10:33:11 +01:00
"execution_count": 34,
2021-07-22 11:03:05 +02:00
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"fill": "toself",
"name": "re3data",
"r": [
2022-02-14 13:34:42 +01:00
45,
389,
3380,
3623,
262,
48
2021-07-22 11:03:05 +02:00
],
"theta": [
"AF",
"AS",
"EU",
"NA",
"OC",
"SA"
],
"type": "scatterpolar"
},
{
"fill": "toself",
"name": "OpenDOAR",
"r": [
2022-02-14 13:34:42 +01:00
242,
1469,
2243,
1139,
2021-10-08 14:28:56 +02:00
111,
2022-02-14 13:34:42 +01:00
596
2021-07-22 11:03:05 +02:00
],
"theta": [
"AF",
"AS",
"EU",
"NA",
"OC",
"SA"
],
"type": "scatterpolar"
},
{
"fill": "toself",
"name": "ROAR",
"r": [
2022-02-14 13:34:42 +01:00
202,
1145,
1928,
1111,
2021-10-08 14:28:56 +02:00
111,
2022-02-14 13:34:42 +01:00
645
2021-07-22 11:03:05 +02:00
],
"theta": [
"AF",
"AS",
"EU",
"NA",
"OC",
"SA"
],
"type": "scatterpolar"
},
{
"fill": "toself",
"name": "FAIRsharing",
"r": [
27,
2022-03-17 10:33:11 +01:00
332,
2205,
815,
71,
14
2021-07-22 11:03:05 +02:00
],
"theta": [
"AF",
"AS",
"EU",
"NA",
"OC",
"SA"
],
"type": "scatterpolar"
}
],
"layout": {
"polar": {
"radialaxis": {
"visible": true
}
},
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
2022-02-14 13:34:42 +01:00
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
2021-07-22 11:03:05 +02:00
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
2022-02-14 13:34:42 +01:00
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
2021-07-22 11:03:05 +02:00
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
2022-02-14 13:34:42 +01:00
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
2021-07-22 11:03:05 +02:00
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
}
}
},
"text/html": [
2022-03-17 10:33:11 +01:00
"<div> <div id=\"3f3d3a88-e456-4fab-8f14-25a750a93fe6\" class=\"plotly-graph-div\" style=\"height:525px; width:100%;\"></div> <script type=\"text/javascript\"> require([\"plotly\"], function(Plotly) { window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById(\"3f3d3a88-e456-4fab-8f14-25a750a93fe6\")) { Plotly.newPlot( \"3f3d3a88-e456-4fab-8f14-25a750a93fe6\", [{\"fill\":\"toself\",\"name\":\"re3data\",\"r\":[45,389,3380,3623,262,48],\"theta\":[\"AF\",\"AS\",\"EU\",\"NA\",\"OC\",\"SA\"],\"type\":\"scatterpolar\"},{\"fill\":\"toself\",\"name\":\"OpenDOAR\",\"r\":[242,1469,2243,1139,111,596],\"theta\":[\"AF\",\"AS\",\"EU\",\"NA\",\"OC\",\"SA\"],\"type\":\"scatterpolar\"},{\"fill\":\"toself\",\"name\":\"ROAR\",\"r\":[202,1145,1928,1111,111,645],\"theta\":[\"AF\",\"AS\",\"EU\",\"NA\",\"OC\",\"SA\"],\"type\":\"scatterpolar\"},{\"fill\":\"toself\",\"name\":\"FAIRsharing\",\"r\":[27,332,2205,815,71,14],\"theta\":[\"AF\",\"AS\",\"EU\",\"NA\",\"OC\",\"SA\"],\"type\":\"scatterpolar\"}], {\"polar\":{\"radialaxis\":{\"visible\":true}},\"template\":{\"data\":{\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"choropleth\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"choropleth\"}],\"contour\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"contour\"}],\"contourcarpet\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"type\":\"contourcarpet\"}],\"heatmap\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"heatmap\"}],\"heatmapgl\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"heatmapgl\"}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"histogram2d\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"type\":\"histogram2d\"}],\"histogram2dcontour\":[{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.22222222222
2021-07-22 11:03:05 +02:00
" \n",
2022-03-17 10:33:11 +01:00
"var gd = document.getElementById('3f3d3a88-e456-4fab-8f14-25a750a93fe6');\n",
2021-07-22 11:03:05 +02:00
"var x = new MutationObserver(function (mutations, observer) {{\n",
" var display = window.getComputedStyle(gd).display;\n",
" if (!display || display === 'none') {{\n",
" console.log([gd, 'removed!']);\n",
" Plotly.purge(gd);\n",
" observer.disconnect();\n",
" }}\n",
"}});\n",
"\n",
"// Listen for the removal of the full notebook cells\n",
"var notebookContainer = gd.closest('#notebook-container');\n",
"if (notebookContainer) {{\n",
" x.observe(notebookContainer, {childList: true});\n",
"}}\n",
"\n",
"// Listen for the clearing of the current output cell\n",
"var outputEl = gd.closest('.output');\n",
"if (outputEl) {{\n",
" x.observe(outputEl, {childList: true});\n",
"}}\n",
"\n",
" }) }; }); </script> </div>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2021-10-08 14:28:56 +02:00
"data1 = re3data_institutions.groupby('org_continent')[['orgIdentifier']].count()\n",
"data2 = opendoar_institutions.groupby('org_continent')[['system_metadata.id']].count()\n",
"data3 = roar_institutions.groupby('continent')[['eprintid']].count()\n",
"data4 = fairsharing_countries.groupby('continent')[['id']].count()\n",
2021-07-22 11:03:05 +02:00
"\n",
"plot = [\n",
" go.Scatterpolar(\n",
2021-10-08 14:28:56 +02:00
" r=data1.orgIdentifier,\n",
2021-07-22 11:03:05 +02:00
" theta=data1.index,\n",
" fill='toself',\n",
" name='re3data'),\n",
" go.Scatterpolar(\n",
2021-10-08 14:28:56 +02:00
" r=data2['system_metadata.id'],\n",
2021-07-22 11:03:05 +02:00
" theta=data2.index,\n",
" fill='toself',\n",
" name='OpenDOAR'),\n",
" go.Scatterpolar(\n",
" r=data3.eprintid,\n",
" theta=data3.index,\n",
" fill='toself',\n",
" name='ROAR'),\n",
" go.Scatterpolar(\n",
2021-10-08 14:28:56 +02:00
" r=data4.id,\n",
2021-07-22 11:03:05 +02:00
" theta=data4.index,\n",
" fill='toself',\n",
" name='FAIRsharing')\n",
"]\n",
"\n",
"layout = go.Layout(polar=dict(\n",
" radialaxis=dict(\n",
" visible=True\n",
" ),\n",
" )\n",
")\n",
"\n",
"go.Figure(plot, layout).show()"
]
},
2022-02-14 13:34:42 +01:00
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
2021-07-22 11:03:05 +02:00
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
2022-02-14 13:34:42 +01:00
"display_name": "Python 3 (ipykernel)",
2021-07-22 11:03:05 +02:00
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}