{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import ast\n", "import csv\n", "import json\n", "import reverse_geocoder as rg\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import pycountry_convert\n", "\n", "import matplotlib.pyplot as plt\n", "from matplotlib_venn import venn2, venn2_circles\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px\n", "\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def country_to_countrycode(country):\n", " if pd.isna(country):\n", " return np.nan\n", " else:\n", " try:\n", " return pycountry_convert.country_name_to_country_alpha3(country)\n", " except:\n", " return np.nan\n", " \n", "def countrycode_iso2_to_countrycode_iso3(country):\n", " if pd.isna(country):\n", " return np.nan\n", " else:\n", " try:\n", " return pycountry_convert.country_name_to_country_alpha3(pycountry_convert.country_alpha2_to_country_name(country))\n", " except:\n", " return np.nan\n", "\n", "def countrycode_to_continent(country_code):\n", " if pd.isna(country_code):\n", " return np.nan\n", " else:\n", " try:\n", " return pycountry_convert.country_alpha2_to_continent_code(pycountry_convert.country_alpha3_to_country_alpha2(country_code))\n", " except:\n", " return np.nan" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading datasets" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**re3data**" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orgIdentifierrepositoryNamerepositoryName.languageadditionalNamerepositoryURLrepositoryIdentifierrepositoryContactdescriptiondescription.languagetypesizestartDateendDaterepositoryLanguagesubjectmissionStatementURLcontentTypeproviderTypekeywordinstitutionpolicydatabaseAccessdatabaseLicensedataAccessdataLicensedataUploadTypedataUploadLicensesoftwareversioningapipidSystemcitationGuidelineURLaidSystemenhancedPublicationqualityManagementcertificatemetadataStandardsyndicationremarksentryDatelastUpdate
0r3d100000001Odum Institute Archive Dataverseeng[]https://dataverse.unc.edu/dataverse/odum[][\"https://dataverse.unc.edu/dataverse/odum#\", ...The Odum Institute Archive Dataverse contains ...eng[disciplinary]{\"size\": \"13 dataverses; 3.050 datasets\", \"upd...NaNNaN[\"eng\"][{'name': '1 Humanities and Social Sciences', ...NaN[{'name': 'Databases', 'scheme': 'parse'}, {'n...[dataProvider][FAIR, Middle East, crime, demography, economy...[{'institutionName': 'Odum Institute for Resea...[{\"policyName\": \"Collection Development Policy...{\"databaseAccessType\": \"open\", \"databaseAcces...[{\"databaseLicenseName\": \"CC0\", \"databaseLicen...[{\"dataAccessType\": \"embargoed\", \"dataAccessRe...[{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"...restricted[][\"DataVerse\"]NaN{}[\"DOI\"]NaN[]unknownyes[\"other\"][{\"metadataStandardName\": \"DDI - Data Document...{}Odum Dataverse is covered by Thomson Reuters D...2013-06-102021-07-06
1r3d100000002Access to Archival Databaseseng[{'additionalName': 'AAD', 'additionalNameLang...https://aad.archives.gov/aad/[RRID:SCR_010479, RRID:nlx_157752][\"https://www.archives.gov/contact\"]You will find in the Access to Archival Databa...eng[disciplinary]{\"size\": \"\", \"updatedp\": \"\"}1985NaN[\"eng\", \"spa\"][{'name': '1 Humanities and Social Sciences', ...https://www.archives.gov/publications/general-...[{'name': 'Images', 'scheme': 'parse'}, {'name...[dataProvider][US History][{'institutionName': 'The U.S. National Archiv...[{\"policyName\": \"Contribution Policy\", \"policy...{\"databaseAccessType\": \"open\", \"databaseAcces...[][{\"dataAccessType\": \"open\", \"dataAccessRestric...[{\"dataLicenseName\": \"Copyrights\", \"dataLicens...restricted[][\"unknown\"]no{\"api\": \"https://www.archives.gov/developer#to...[\"none\"]https://aad.archives.gov/aad/help/getting-star...[]unknownunknown[][]{\"syndication\": \"http://www.archives.gov/socia...NaN2012-07-042021-05-25
2r3d100000004Datenbank Gesprochenes Deutschdeu[{'additionalName': 'DGD', 'additionalNameLang...https://dgd.ids-mannheim.de/[][\"dgd@ids-mannheim.de\"]The \"Database for Spoken German (DGD)\" is a co...eng[disciplinary]{\"size\": \"34 corpora\", \"updatedp\": \"2020-02-03\"}2012NaN[\"deu\"][{'name': '1 Humanities and Social Sciences', ...https://dgd.ids-mannheim.de/dgd/pragdb.dgd_ext...[{'name': 'Audiovisual data', 'scheme': 'parse...[dataProvider, serviceProvider][Australian German, FOLK, German dialects, Pfe...[{'institutionName': 'Institut für Deutsche Sp...[{\"policyName\": \"Erfurter Aufruf zur Sicherung...{\"databaseAccessType\": \"restricted\", \"databas...[][{\"dataAccessType\": \"restricted\", \"dataAccessR...[{\"dataLicenseName\": \"other\", \"dataLicenseURL\"...restricted[][\"other\"]yes{}[\"none\"]http://agd.ids-mannheim.de/konditionen.shtml[]unknownunknown[\"RatSWD\"][]{}NaN2012-07-202020-08-27
3r3d100000005UNC Dataverseeng[{'additionalName': 'University of North Carol...https://dataverse.unc.edu/[][\"https://dataverse.unc.edu/\", \"odumarchive@un...UNC Dataverse is an open-source repository sof...eng[institutional]{\"size\": \"186 dataverses; 25.272 studies; 229....2011NaN[\"eng\"][{'name': '1 Humanities and Social Sciences', ...https://odum.unc.edu/about/mission-vision/[{'name': 'Archived data', 'scheme': 'parse'},...[dataProvider, serviceProvider][FAIR, census, demographic survey, demography,...[{'institutionName': 'Odum Institute for Resea...[{\"policyName\": \"Collection Development Policy...{\"databaseAccessType\": \"open\", \"databaseAcces...[][{\"dataAccessType\": \"open\", \"dataAccessRestric...[{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"...restricted[{\"dataUploadLicenseName\": \"Data Deposit Form\"...[\"DataVerse\"]yes{\"api\": \"https://guides.dataverse.org/en/lates...[\"ARK\", \"DOI\", \"PURL\", \"URN\", \"hdl\"]https://dataverse.org/best-practices/data-cita...[]unknownyes[][{\"metadataStandardName\": \"DDI - Data Document...{}UNC Dataverse is covered by Clarivate Data Cit...2012-07-232021-08-11
4r3d100000006Archaeology Data Serviceeng[{'additionalName': 'ADS', 'additionalNameLang...https://archaeologydataservice.ac.uk/[FAIRsharing_doi:10.25504/FAIRsharing.hm1mfg][\"help@archaeologydataservice.ac.uk\", \"https:/...The ADS is an accredited digital repository fo...eng[disciplinary]{\"size\": \"1837 results\", \"updatedp\": \"2020-05-...1996-10-01NaN[\"eng\"][{'name': '1 Humanities and Social Sciences', ...https://archaeologydataservice.ac.uk/about/our...[{'name': 'Archived data', 'scheme': 'parse'},...[dataProvider, serviceProvider][FAIR, archaeology, cultural heritage, prehist...[{'institutionName': 'Arts and Humanities Rese...[{\"policyName\": \"ADS Guides to good practice\",...{\"databaseAccessType\": \"open\", \"databaseAcces...[{\"databaseLicenseName\": \"CC\", \"databaseLicens...[{\"dataAccessType\": \"open\", \"dataAccessRestric...[{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"...restricted[{\"dataUploadLicenseName\": \"Guidelines for Dep...[\"other\"]yes{\"api\": \"https://archaeologydataservice.ac.uk/...[\"DOI\"]https://archaeologydataservice.ac.uk/advice/te...[]unknownyes[\"other\"][{\"metadataStandardName\": \"DataCite Metadata S...{\"syndication\": \"https://archaeologydataservic...ADS is covered by Clarivate Data Citation Inde...2012-07-232021-09-02
\n", "
" ], "text/plain": [ " orgIdentifier repositoryName repositoryName.language \\\n", "0 r3d100000001 Odum Institute Archive Dataverse eng \n", "1 r3d100000002 Access to Archival Databases eng \n", "2 r3d100000004 Datenbank Gesprochenes Deutsch deu \n", "3 r3d100000005 UNC Dataverse eng \n", "4 r3d100000006 Archaeology Data Service eng \n", "\n", " additionalName \\\n", "0 [] \n", "1 [{'additionalName': 'AAD', 'additionalNameLang... \n", "2 [{'additionalName': 'DGD', 'additionalNameLang... \n", "3 [{'additionalName': 'University of North Carol... \n", "4 [{'additionalName': 'ADS', 'additionalNameLang... \n", "\n", " repositoryURL \\\n", "0 https://dataverse.unc.edu/dataverse/odum \n", "1 https://aad.archives.gov/aad/ \n", "2 https://dgd.ids-mannheim.de/ \n", "3 https://dataverse.unc.edu/ \n", "4 https://archaeologydataservice.ac.uk/ \n", "\n", " repositoryIdentifier \\\n", "0 [] \n", "1 [RRID:SCR_010479, RRID:nlx_157752] \n", "2 [] \n", "3 [] \n", "4 [FAIRsharing_doi:10.25504/FAIRsharing.hm1mfg] \n", "\n", " repositoryContact \\\n", "0 [\"https://dataverse.unc.edu/dataverse/odum#\", ... \n", "1 [\"https://www.archives.gov/contact\"] \n", "2 [\"dgd@ids-mannheim.de\"] \n", "3 [\"https://dataverse.unc.edu/\", \"odumarchive@un... \n", "4 [\"help@archaeologydataservice.ac.uk\", \"https:/... \n", "\n", " description description.language \\\n", "0 The Odum Institute Archive Dataverse contains ... eng \n", "1 You will find in the Access to Archival Databa... eng \n", "2 The \"Database for Spoken German (DGD)\" is a co... eng \n", "3 UNC Dataverse is an open-source repository sof... eng \n", "4 The ADS is an accredited digital repository fo... eng \n", "\n", " type size \\\n", "0 [disciplinary] {\"size\": \"13 dataverses; 3.050 datasets\", \"upd... \n", "1 [disciplinary] {\"size\": \"\", \"updatedp\": \"\"} \n", "2 [disciplinary] {\"size\": \"34 corpora\", \"updatedp\": \"2020-02-03\"} \n", "3 [institutional] {\"size\": \"186 dataverses; 25.272 studies; 229.... \n", "4 [disciplinary] {\"size\": \"1837 results\", \"updatedp\": \"2020-05-... \n", "\n", " startDate endDate repositoryLanguage \\\n", "0 NaN NaN [\"eng\"] \n", "1 1985 NaN [\"eng\", \"spa\"] \n", "2 2012 NaN [\"deu\"] \n", "3 2011 NaN [\"eng\"] \n", "4 1996-10-01 NaN [\"eng\"] \n", "\n", " subject \\\n", "0 [{'name': '1 Humanities and Social Sciences', ... \n", "1 [{'name': '1 Humanities and Social Sciences', ... \n", "2 [{'name': '1 Humanities and Social Sciences', ... \n", "3 [{'name': '1 Humanities and Social Sciences', ... \n", "4 [{'name': '1 Humanities and Social Sciences', ... \n", "\n", " missionStatementURL \\\n", "0 NaN \n", "1 https://www.archives.gov/publications/general-... \n", "2 https://dgd.ids-mannheim.de/dgd/pragdb.dgd_ext... \n", "3 https://odum.unc.edu/about/mission-vision/ \n", "4 https://archaeologydataservice.ac.uk/about/our... \n", "\n", " contentType \\\n", "0 [{'name': 'Databases', 'scheme': 'parse'}, {'n... \n", "1 [{'name': 'Images', 'scheme': 'parse'}, {'name... \n", "2 [{'name': 'Audiovisual data', 'scheme': 'parse... \n", "3 [{'name': 'Archived data', 'scheme': 'parse'},... \n", "4 [{'name': 'Archived data', 'scheme': 'parse'},... \n", "\n", " providerType \\\n", "0 [dataProvider] \n", "1 [dataProvider] \n", "2 [dataProvider, serviceProvider] \n", "3 [dataProvider, serviceProvider] \n", "4 [dataProvider, serviceProvider] \n", "\n", " keyword \\\n", "0 [FAIR, Middle East, crime, demography, economy... \n", "1 [US History] \n", "2 [Australian German, FOLK, German dialects, Pfe... \n", "3 [FAIR, census, demographic survey, demography,... \n", "4 [FAIR, archaeology, cultural heritage, prehist... \n", "\n", " institution \\\n", "0 [{'institutionName': 'Odum Institute for Resea... \n", "1 [{'institutionName': 'The U.S. National Archiv... \n", "2 [{'institutionName': 'Institut für Deutsche Sp... \n", "3 [{'institutionName': 'Odum Institute for Resea... \n", "4 [{'institutionName': 'Arts and Humanities Rese... \n", "\n", " policy \\\n", "0 [{\"policyName\": \"Collection Development Policy... \n", "1 [{\"policyName\": \"Contribution Policy\", \"policy... \n", "2 [{\"policyName\": \"Erfurter Aufruf zur Sicherung... \n", "3 [{\"policyName\": \"Collection Development Policy... \n", "4 [{\"policyName\": \"ADS Guides to good practice\",... \n", "\n", " databaseAccess \\\n", "0 {\"databaseAccessType\": \"open\", \"databaseAcces... \n", "1 {\"databaseAccessType\": \"open\", \"databaseAcces... \n", "2 {\"databaseAccessType\": \"restricted\", \"databas... \n", "3 {\"databaseAccessType\": \"open\", \"databaseAcces... \n", "4 {\"databaseAccessType\": \"open\", \"databaseAcces... \n", "\n", " databaseLicense \\\n", "0 [{\"databaseLicenseName\": \"CC0\", \"databaseLicen... \n", "1 [] \n", "2 [] \n", "3 [] \n", "4 [{\"databaseLicenseName\": \"CC\", \"databaseLicens... \n", "\n", " dataAccess \\\n", "0 [{\"dataAccessType\": \"embargoed\", \"dataAccessRe... \n", "1 [{\"dataAccessType\": \"open\", \"dataAccessRestric... \n", "2 [{\"dataAccessType\": \"restricted\", \"dataAccessR... \n", "3 [{\"dataAccessType\": \"open\", \"dataAccessRestric... \n", "4 [{\"dataAccessType\": \"open\", \"dataAccessRestric... \n", "\n", " dataLicense dataUploadType \\\n", "0 [{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"... restricted \n", "1 [{\"dataLicenseName\": \"Copyrights\", \"dataLicens... restricted \n", "2 [{\"dataLicenseName\": \"other\", \"dataLicenseURL\"... restricted \n", "3 [{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"... restricted \n", "4 [{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"... restricted \n", "\n", " dataUploadLicense software \\\n", "0 [] [\"DataVerse\"] \n", "1 [] [\"unknown\"] \n", "2 [] [\"other\"] \n", "3 [{\"dataUploadLicenseName\": \"Data Deposit Form\"... [\"DataVerse\"] \n", "4 [{\"dataUploadLicenseName\": \"Guidelines for Dep... [\"other\"] \n", "\n", " versioning api \\\n", "0 NaN {} \n", "1 no {\"api\": \"https://www.archives.gov/developer#to... \n", "2 yes {} \n", "3 yes {\"api\": \"https://guides.dataverse.org/en/lates... \n", "4 yes {\"api\": \"https://archaeologydataservice.ac.uk/... \n", "\n", " pidSystem \\\n", "0 [\"DOI\"] \n", "1 [\"none\"] \n", "2 [\"none\"] \n", "3 [\"ARK\", \"DOI\", \"PURL\", \"URN\", \"hdl\"] \n", "4 [\"DOI\"] \n", "\n", " citationGuidelineURL aidSystem \\\n", "0 NaN [] \n", "1 https://aad.archives.gov/aad/help/getting-star... [] \n", "2 http://agd.ids-mannheim.de/konditionen.shtml [] \n", "3 https://dataverse.org/best-practices/data-cita... [] \n", "4 https://archaeologydataservice.ac.uk/advice/te... [] \n", "\n", " enhancedPublication qualityManagement certificate \\\n", "0 unknown yes [\"other\"] \n", "1 unknown unknown [] \n", "2 unknown unknown [\"RatSWD\"] \n", "3 unknown yes [] \n", "4 unknown yes [\"other\"] \n", "\n", " metadataStandard \\\n", "0 [{\"metadataStandardName\": \"DDI - Data Document... \n", "1 [] \n", "2 [] \n", "3 [{\"metadataStandardName\": \"DDI - Data Document... \n", "4 [{\"metadataStandardName\": \"DataCite Metadata S... \n", "\n", " syndication \\\n", "0 {} \n", "1 {\"syndication\": \"http://www.archives.gov/socia... \n", "2 {} \n", "3 {} \n", "4 {\"syndication\": \"https://archaeologydataservic... \n", "\n", " remarks entryDate lastUpdate \n", "0 Odum Dataverse is covered by Thomson Reuters D... 2013-06-10 2021-07-06 \n", "1 NaN 2012-07-04 2021-05-25 \n", "2 NaN 2012-07-20 2020-08-27 \n", "3 UNC Dataverse is covered by Clarivate Data Cit... 2012-07-23 2021-08-11 \n", "4 ADS is covered by Clarivate Data Citation Inde... 2012-07-23 2021-09-02 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_df = pd.read_csv('../data/raw/re3data.tsv', delimiter='\\t',\n", " converters={'subject': ast.literal_eval,\n", " 'keyword': ast.literal_eval,\n", " 'additionalName': ast.literal_eval,\n", " 'repositoryIdentifier': ast.literal_eval,\n", " 'type': ast.literal_eval,\n", " 'contentType': ast.literal_eval,\n", " 'providerType': ast.literal_eval,\n", " 'institution': ast.literal_eval\n", " })\n", "\n", "re3data_df.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orgIdentifierrepositoryNamerepositoryName.languageadditionalNamerepositoryURLrepositoryIdentifierrepositoryContactdescriptiondescription.languagetypesizestartDateendDaterepositoryLanguagesubjectmissionStatementURLcontentTypeproviderTypekeywordinstitutionpolicydatabaseAccessdatabaseLicensedataAccessdataLicensedataUploadTypedataUploadLicensesoftwareversioningapipidSystemcitationGuidelineURLaidSystemenhancedPublicationqualityManagementcertificatemetadataStandardsyndicationremarksentryDatelastUpdate
count2739273927392739271627392739273927392739273917761572739273923182739273927392739273927392739273927392711273927391316273927391512273927372739273927392739167427392739
unique273927361921622713864245927376912893528010713892249133852504272023191237514522633681232114629132112331417256316561275740
topr3d100000001Språkbankeneng[]http://icgem.gfz-potsdam.de/home[][]The National Archives and Records Administrati...eng[disciplinary]{\"size\": \"\", \"updatedp\": \"\"}20082015[\"eng\"][{'name': '1 Humanities and Social Sciences', ...https://learn.scholarsportal.info/all-guides/d...[{'name': 'Standard office documents', 'scheme...[dataProvider][multidisciplinary][{'institutionName': 'National Center for Biot...[][]{\"databaseAccessType\": \"open\", \"databaseAcces...[][{\"dataAccessType\": \"open\", \"dataAccessRestric...[{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"...restricted[][\"unknown\"]yes{}[\"none\"]https://dataverse.org/best-practices/data-cita...[]unknownyes[][]{}is covered by Elsevier.2016-05-102021-09-03
freq1225545692187620222723173314509211206322614301771193631225712159126964179320131226110814981361722155160815152509166921621420137
\n", "
" ], "text/plain": [ " orgIdentifier repositoryName repositoryName.language additionalName \\\n", "count 2739 2739 2739 2739 \n", "unique 2739 2736 19 2162 \n", "top r3d100000001 Språkbanken eng [] \n", "freq 1 2 2554 569 \n", "\n", " repositoryURL repositoryIdentifier \\\n", "count 2716 2739 \n", "unique 2713 864 \n", "top http://icgem.gfz-potsdam.de/home [] \n", "freq 2 1876 \n", "\n", " repositoryContact description \\\n", "count 2739 2739 \n", "unique 2459 2737 \n", "top [] The National Archives and Records Administrati... \n", "freq 202 2 \n", "\n", " description.language type size \\\n", "count 2739 2739 2739 \n", "unique 6 9 1289 \n", "top eng [disciplinary] {\"size\": \"\", \"updatedp\": \"\"} \n", "freq 2723 1733 1450 \n", "\n", " startDate endDate repositoryLanguage \\\n", "count 1776 157 2739 \n", "unique 352 80 107 \n", "top 2008 2015 [\"eng\"] \n", "freq 92 11 2063 \n", "\n", " subject \\\n", "count 2739 \n", "unique 1389 \n", "top [{'name': '1 Humanities and Social Sciences', ... \n", "freq 226 \n", "\n", " missionStatementURL \\\n", "count 2318 \n", "unique 2249 \n", "top https://learn.scholarsportal.info/all-guides/d... \n", "freq 14 \n", "\n", " contentType providerType \\\n", "count 2739 2739 \n", "unique 1338 5 \n", "top [{'name': 'Standard office documents', 'scheme... [dataProvider] \n", "freq 30 1771 \n", "\n", " keyword \\\n", "count 2739 \n", "unique 2504 \n", "top [multidisciplinary] \n", "freq 193 \n", "\n", " institution policy \\\n", "count 2739 2739 \n", "unique 2720 2319 \n", "top [{'institutionName': 'National Center for Biot... [][] \n", "freq 6 312 \n", "\n", " databaseAccess databaseLicense \\\n", "count 2739 2739 \n", "unique 12 375 \n", "top {\"databaseAccessType\": \"open\", \"databaseAcces... [] \n", "freq 2571 2159 \n", "\n", " dataAccess \\\n", "count 2739 \n", "unique 145 \n", "top [{\"dataAccessType\": \"open\", \"dataAccessRestric... \n", "freq 1269 \n", "\n", " dataLicense dataUploadType \\\n", "count 2739 2711 \n", "unique 2263 3 \n", "top [{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"... restricted \n", "freq 64 1793 \n", "\n", " dataUploadLicense software versioning api pidSystem \\\n", "count 2739 2739 1316 2739 2739 \n", "unique 681 23 2 1146 29 \n", "top [] [\"unknown\"] yes {} [\"none\"] \n", "freq 2013 1226 1108 1498 1361 \n", "\n", " citationGuidelineURL aidSystem \\\n", "count 1512 2739 \n", "unique 1321 12 \n", "top https://dataverse.org/best-practices/data-cita... [] \n", "freq 72 2155 \n", "\n", " enhancedPublication qualityManagement certificate metadataStandard \\\n", "count 2737 2739 2739 2739 \n", "unique 3 3 14 172 \n", "top unknown yes [] [] \n", "freq 1608 1515 2509 1669 \n", "\n", " syndication remarks entryDate lastUpdate \n", "count 2739 1674 2739 2739 \n", "unique 563 1656 1275 740 \n", "top {} is covered by Elsevier. 2016-05-10 2021-09-03 \n", "freq 2162 14 20 137 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_df.describe(include='all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**openDOAR**" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
system_metadata.idrepository_metadata.namerepository_metadata.alternativenamerepository_metadata.urlrepository_metadata.descriptionrepository_metadata.typerepository_metadata.content_languagessystem_metadata.date_modifiedsystem_metadata.date_createdrepository_metadata.content_subjectsrepository_metadata.content_typesorganizationpolicy_urlsrepository_metadata.softwarerepository_metadata.oai_urlsystem_metadata.publicly_visiblerepository_metadata.repository_statusrepository_metadata.fulltext_record_countrepository_metadata.metadata_record_count
0175{\"name\": \"hku theses online\", \"language\": \"en\"}[]http://hub.hku.hk/handle/10722/1057this is an institutional repository providing ...institutional[\"zh\", \"en\"]2021-03-25 10:16:182005-12-21 12:44:08[multidisciplinary][bibliographic_references, theses_and_disserta...[{'name': 'university of hong kong', 'alternat...[]{\"name\": \"dspace\", \"version\": \"cris-5.3.1-snap...NaNyesfully_functionalNaN11850.0
164{\"name\": \"research support scheme - central eu...[]http://rss.archives.ceu.hu/this is an institutional repository collecting...institutional[\"cs\", \"en\", \"hu\", \"ru\"]2021-03-25 09:48:312006-01-04 14:59:30[multidisciplinary][unpub_reports_and_working_papers][{'name': 'central european university', 'alte...[]{\"name\": \"eprints\", \"version\": \"2.2.1\"}http://rss.archives.ceu.hu/perl/oai2yesfully_functionalNaN164.0
2151{\"name\": \"cadmus, eui research repository\", \"l...[]http://cadmus.eui.eu/cadmus is the name of the eui research reposit...institutional[\"nl\", \"en\", \"fr\", \"de\", \"it\"]2021-09-13 13:35:362006-01-04 12:07:07[history and archaeology, multidisciplinary, s...[journal_articles, theses_and_dissertations, u...[{'name': 'european university institute', 'al...[{\"policy_url\": \"https://www.eui.eu/research/e...{\"name\": \"dspace\", \"version\": \"5.2\"}http://cadmus.eui.eu/oai/requestyesfully_functional3867.024869.0
3105{\"name\": \"document server@uhasselt\", \"language...[]https://doclib.uhasselt.be/dspace/this site is a university repository providing...institutional[\"nl\", \"en\", \"fr\", \"de\"]2021-04-16 15:23:522006-01-24 15:46:44[multidisciplinary][journal_articles, conference_and_workshop_pap...[{'name': 'uhasselt', 'alternativeName': 'hass...[]{\"name\": \"dspace\", \"version\": \"1.7.2\"}http://doclib.uhasselt.be/dspace-oai/requestyesfully_functional0.027376.0
4101{\"name\": \"utrecht university repository\", \"lan...[]http://dspace.library.uu.nlthis site is a university repository providing...institutional[\"nl\", \"en\"]2021-04-16 15:22:032006-01-13 12:55:13[multidisciplinary][journal_articles, conference_and_workshop_pap...[{'name': 'university of utrecht', 'alternativ...[]{\"name\": \"dspace\", \"version\": \"\"}https://dspace.library.uu.nl/oai/requestyesfully_functional1686.0185637.0
\n", "
" ], "text/plain": [ " system_metadata.id repository_metadata.name \\\n", "0 175 {\"name\": \"hku theses online\", \"language\": \"en\"} \n", "1 64 {\"name\": \"research support scheme - central eu... \n", "2 151 {\"name\": \"cadmus, eui research repository\", \"l... \n", "3 105 {\"name\": \"document server@uhasselt\", \"language... \n", "4 101 {\"name\": \"utrecht university repository\", \"lan... \n", "\n", " repository_metadata.alternativename repository_metadata.url \\\n", "0 [] http://hub.hku.hk/handle/10722/1057 \n", "1 [] http://rss.archives.ceu.hu/ \n", "2 [] http://cadmus.eui.eu/ \n", "3 [] https://doclib.uhasselt.be/dspace/ \n", "4 [] http://dspace.library.uu.nl \n", "\n", " repository_metadata.description repository_metadata.type \\\n", "0 this is an institutional repository providing ... institutional \n", "1 this is an institutional repository collecting... institutional \n", "2 cadmus is the name of the eui research reposit... institutional \n", "3 this site is a university repository providing... institutional \n", "4 this site is a university repository providing... institutional \n", "\n", " repository_metadata.content_languages system_metadata.date_modified \\\n", "0 [\"zh\", \"en\"] 2021-03-25 10:16:18 \n", "1 [\"cs\", \"en\", \"hu\", \"ru\"] 2021-03-25 09:48:31 \n", "2 [\"nl\", \"en\", \"fr\", \"de\", \"it\"] 2021-09-13 13:35:36 \n", "3 [\"nl\", \"en\", \"fr\", \"de\"] 2021-04-16 15:23:52 \n", "4 [\"nl\", \"en\"] 2021-04-16 15:22:03 \n", "\n", " system_metadata.date_created \\\n", "0 2005-12-21 12:44:08 \n", "1 2006-01-04 14:59:30 \n", "2 2006-01-04 12:07:07 \n", "3 2006-01-24 15:46:44 \n", "4 2006-01-13 12:55:13 \n", "\n", " repository_metadata.content_subjects \\\n", "0 [multidisciplinary] \n", "1 [multidisciplinary] \n", "2 [history and archaeology, multidisciplinary, s... \n", "3 [multidisciplinary] \n", "4 [multidisciplinary] \n", "\n", " repository_metadata.content_types \\\n", "0 [bibliographic_references, theses_and_disserta... \n", "1 [unpub_reports_and_working_papers] \n", "2 [journal_articles, theses_and_dissertations, u... \n", "3 [journal_articles, conference_and_workshop_pap... \n", "4 [journal_articles, conference_and_workshop_pap... \n", "\n", " organization \\\n", "0 [{'name': 'university of hong kong', 'alternat... \n", "1 [{'name': 'central european university', 'alte... \n", "2 [{'name': 'european university institute', 'al... \n", "3 [{'name': 'uhasselt', 'alternativeName': 'hass... \n", "4 [{'name': 'university of utrecht', 'alternativ... \n", "\n", " policy_urls \\\n", "0 [] \n", "1 [] \n", "2 [{\"policy_url\": \"https://www.eui.eu/research/e... \n", "3 [] \n", "4 [] \n", "\n", " repository_metadata.software \\\n", "0 {\"name\": \"dspace\", \"version\": \"cris-5.3.1-snap... \n", "1 {\"name\": \"eprints\", \"version\": \"2.2.1\"} \n", "2 {\"name\": \"dspace\", \"version\": \"5.2\"} \n", "3 {\"name\": \"dspace\", \"version\": \"1.7.2\"} \n", "4 {\"name\": \"dspace\", \"version\": \"\"} \n", "\n", " repository_metadata.oai_url \\\n", "0 NaN \n", "1 http://rss.archives.ceu.hu/perl/oai2 \n", "2 http://cadmus.eui.eu/oai/request \n", "3 http://doclib.uhasselt.be/dspace-oai/request \n", "4 https://dspace.library.uu.nl/oai/request \n", "\n", " system_metadata.publicly_visible repository_metadata.repository_status \\\n", "0 yes fully_functional \n", "1 yes fully_functional \n", "2 yes fully_functional \n", "3 yes fully_functional \n", "4 yes fully_functional \n", "\n", " repository_metadata.fulltext_record_count \\\n", "0 NaN \n", "1 NaN \n", "2 3867.0 \n", "3 0.0 \n", "4 1686.0 \n", "\n", " repository_metadata.metadata_record_count \n", "0 11850.0 \n", "1 164.0 \n", "2 24869.0 \n", "3 27376.0 \n", "4 185637.0 " ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_df = pd.read_csv('../data/raw/openDoar.tsv', delimiter='\\t',\n", " converters={'repository_metadata.content_subjects': ast.literal_eval,\n", " 'repository_metadata.alternativename': ast.literal_eval,\n", " 'repository_metadata.content_types': ast.literal_eval,\n", " 'organization': ast.literal_eval\n", " },\n", " dtype={'system_metadata.id': str})\n", "\n", "opendoar_df.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
system_metadata.idrepository_metadata.namerepository_metadata.alternativenamerepository_metadata.urlrepository_metadata.descriptionrepository_metadata.typerepository_metadata.content_languagessystem_metadata.date_modifiedsystem_metadata.date_createdrepository_metadata.content_subjectsrepository_metadata.content_typesorganizationpolicy_urlsrepository_metadata.softwarerepository_metadata.oai_urlsystem_metadata.publicly_visiblerepository_metadata.repository_statusrepository_metadata.fulltext_record_countrepository_metadata.metadata_record_count
count574257425742574254215742574257425742574257425742574257424402574255952.299000e+034.197000e+03
unique574257132108570546194330237255738214785201642321437017NaNNaN
top175{\"name\": \"hiroshima associated repository port...[]http://harp.lib.hiroshima-u.ac.jp/this site provides access to the research outp...institutional[\"en\"]2020-09-18 12:53:482020-09-18 12:53:48[\"multidisciplinary\"][theses_and_dissertations][{'name': 'rijksuniversiteit groningen', 'alte...[]{\"name\": \"dspace\", \"version\": \"\"}https://kidoks.bsz-bw.de/oaiyesfully_functionalNaNNaN
freq1335953955096191782823227465265098822357425276NaNNaN
meanNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN5.010186e+031.760546e+05
stdNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN4.206295e+046.600825e+06
minNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.000000e+000.000000e+00
25%NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.000000e+008.950000e+02
50%NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN4.220000e+024.026000e+03
75%NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN2.930500e+031.630400e+04
maxNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN1.817531e+064.200000e+08
\n", "
" ], "text/plain": [ " system_metadata.id repository_metadata.name \\\n", "count 5742 5742 \n", "unique 5742 5713 \n", "top 175 {\"name\": \"hiroshima associated repository port... \n", "freq 1 3 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " repository_metadata.alternativename \\\n", "count 5742 \n", "unique 2108 \n", "top [] \n", "freq 3595 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " repository_metadata.url \\\n", "count 5742 \n", "unique 5705 \n", "top http://harp.lib.hiroshima-u.ac.jp/ \n", "freq 3 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " repository_metadata.description \\\n", "count 5421 \n", "unique 4619 \n", "top this site provides access to the research outp... \n", "freq 95 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " repository_metadata.type repository_metadata.content_languages \\\n", "count 5742 5742 \n", "unique 4 330 \n", "top institutional [\"en\"] \n", "freq 5096 1917 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " system_metadata.date_modified system_metadata.date_created \\\n", "count 5742 5742 \n", "unique 2372 5573 \n", "top 2020-09-18 12:53:48 2020-09-18 12:53:48 \n", "freq 82 82 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " repository_metadata.content_subjects repository_metadata.content_types \\\n", "count 5742 5742 \n", "unique 821 478 \n", "top [\"multidisciplinary\"] [theses_and_dissertations] \n", "freq 3227 465 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " organization policy_urls \\\n", "count 5742 5742 \n", "unique 5201 642 \n", "top [{'name': 'rijksuniversiteit groningen', 'alte... [] \n", "freq 26 5098 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " repository_metadata.software repository_metadata.oai_url \\\n", "count 5742 4402 \n", "unique 321 4370 \n", "top {\"name\": \"dspace\", \"version\": \"\"} https://kidoks.bsz-bw.de/oai \n", "freq 822 3 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " system_metadata.publicly_visible repository_metadata.repository_status \\\n", "count 5742 5595 \n", "unique 1 7 \n", "top yes fully_functional \n", "freq 5742 5276 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " repository_metadata.fulltext_record_count \\\n", "count 2.299000e+03 \n", "unique NaN \n", "top NaN \n", "freq NaN \n", "mean 5.010186e+03 \n", "std 4.206295e+04 \n", "min 0.000000e+00 \n", "25% 0.000000e+00 \n", "50% 4.220000e+02 \n", "75% 2.930500e+03 \n", "max 1.817531e+06 \n", "\n", " repository_metadata.metadata_record_count \n", "count 4.197000e+03 \n", "unique NaN \n", "top NaN \n", "freq NaN \n", "mean 1.760546e+05 \n", "std 6.600825e+06 \n", "min 0.000000e+00 \n", "25% 8.950000e+02 \n", "50% 4.026000e+03 \n", "75% 1.630400e+04 \n", "max 4.200000e+08 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_df.describe(include='all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**ROAR**" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidrev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
01633archive1NaNNaNdisk0/00/00/00/012010-01-06 13:43:482011-07-18 05:40:072010-01-06 13:43:48subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://archivesic.ccsd.cnrs.fr/@RCHIVESIChttp://archivesic.ccsd.cnrs.fr/oai/oai.phpNaNNaNNaNNaNNaNNaNNaNNaNNaNfrNaNNaNNaNhalgeoname_2_FRotherNaN2002-05-17 19:24:41NaNNaN000250,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...NaNNaNNaNNaN[opendoar, celestial][58, 669]NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
110511archive1NaNNaNdisk0/00/00/00/102010-01-06 13:43:482011-07-18 05:40:132010-01-06 13:43:48institutionalNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://www.diva-portal.org/mdh/Academic Archive On-line (Mälardalen Universit...http://www.diva-portal.org/oai/mdh/OAINaNNaNNaNNaNTRUETRUENaNNaNNaNseUppsala59.866717.6333divageoname_2_SEotherNaN2005-12-08 13:15:22NaNNaN0001000,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100...NaNNaNNaNNaN[opendoar, celestial][258, 526]NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
21000274archive1NaNNaNdisk0/00/00/10/002010-01-06 13:45:012011-07-06 08:21:212010-01-06 13:45:01subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://pam.pisharp.org/PAM - Portuguese Archive of MathematicsNaNNaNNaNNaNNaNTRUETRUENaNNaNNaNptBellevue, WA47.6034-122.155dspacegeoname_2_PTotherNaN2006-05-04 10:48:14NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
31000120archive91NaNNaNdisk0/00/01/00/012015-08-08 14:52:112016-03-21 19:44:012015-08-08 14:52:11subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNhttp://edoc.sub.uni-hamburg.de/klimawandel/Klimawandel Dokumentenserverhttp://edoc.sub.uni-hamburg.de/klimawandel/oaiNaNNaNNaNThe \"Documentenserver Klimawandel\" (Repository...TRUETRUETRUE[KLIMZUG projects, Climate Service Center 2.0,...[http://www.climateservicecenter.de/, http://w...deHamburg53.55119.9937opusgeoname_2_DEother[GF, GE, HD, G1, S1]2015-07-02 08:08:31NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN[opendoar, celestial][3408, 5881]NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
41000811archive404NaNNaNdisk0/00/01/00/082015-08-08 14:52:262016-03-21 19:43:512015-08-08 14:52:26institutionalNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNhttp://creativematter.skidmore.edu/Creative Matter | Skidmore College Researchhttp://creativematter.skidmore.edu/do/oai/NaNhttp://creativematter.skidmore.edu/recent.rssNaNWelcome to Creative Matter, a repository for t...TRUEFALSEFALSESkidmore Collegehttp://www.skidmore.edu/usSaratoga Springs43.0961-73.7818bepressgeoname_2_USotherNaN2015-07-06 17:35:50NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNcelestial5882NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " eprintid rev_number eprint_status userid importid source \\\n", "0 1 633 archive 1 NaN NaN \n", "1 10 511 archive 1 NaN NaN \n", "2 1000 274 archive 1 NaN NaN \n", "3 10001 20 archive 91 NaN NaN \n", "4 10008 11 archive 404 NaN NaN \n", "\n", " dir datestamp lastmod \\\n", "0 disk0/00/00/00/01 2010-01-06 13:43:48 2011-07-18 05:40:07 \n", "1 disk0/00/00/00/10 2010-01-06 13:43:48 2011-07-18 05:40:13 \n", "2 disk0/00/00/10/00 2010-01-06 13:45:01 2011-07-06 08:21:21 \n", "3 disk0/00/01/00/01 2015-08-08 14:52:11 2016-03-21 19:44:01 \n", "4 disk0/00/01/00/08 2015-08-08 14:52:26 2016-03-21 19:43:51 \n", "\n", " status_changed type succeeds commentary \\\n", "0 2010-01-06 13:43:48 subject NaN NaN \n", "1 2010-01-06 13:43:48 institutional NaN NaN \n", "2 2010-01-06 13:45:01 subject NaN NaN \n", "3 2015-08-08 14:52:11 subject NaN NaN \n", "4 2015-08-08 14:52:26 institutional NaN NaN \n", "\n", " metadata_visibility latitude longitude relation_type relation_uri \\\n", "0 show NaN NaN NaN NaN \n", "1 show NaN NaN NaN NaN \n", "2 show NaN NaN NaN NaN \n", "3 show NaN NaN NaN NaN \n", "4 show NaN NaN NaN NaN \n", "\n", " item_issues_id item_issues_type item_issues_description \\\n", "0 NaN NaN NaN \n", "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " item_issues_timestamp item_issues_status item_issues_reported_by \\\n", "0 NaN NaN NaN \n", "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " item_issues_resolved_by item_issues_comment item_issues_count \\\n", "0 NaN NaN 0 \n", "1 NaN NaN 0 \n", "2 NaN NaN 0 \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " sword_depositor sword_slug exemplar \\\n", "0 NaN NaN NaN \n", "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " home_page \\\n", "0 http://archivesic.ccsd.cnrs.fr/ \n", "1 http://www.diva-portal.org/mdh/ \n", "2 http://pam.pisharp.org/ \n", "3 http://edoc.sub.uni-hamburg.de/klimawandel/ \n", "4 http://creativematter.skidmore.edu/ \n", "\n", " title \\\n", "0 @RCHIVESIC \n", "1 Academic Archive On-line (Mälardalen Universit... \n", "2 PAM - Portuguese Archive of Mathematics \n", "3 Klimawandel Dokumentenserver \n", "4 Creative Matter | Skidmore College Research \n", "\n", " oai_pmh sword_endpoint \\\n", "0 http://archivesic.ccsd.cnrs.fr/oai/oai.php NaN \n", "1 http://www.diva-portal.org/oai/mdh/OAI NaN \n", "2 NaN NaN \n", "3 http://edoc.sub.uni-hamburg.de/klimawandel/oai NaN \n", "4 http://creativematter.skidmore.edu/do/oai/ NaN \n", "\n", " rss_feed twitter_feed \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 http://creativematter.skidmore.edu/recent.rss NaN \n", "\n", " description fulltext open_access \\\n", "0 NaN NaN NaN \n", "1 NaN TRUE TRUE \n", "2 NaN TRUE TRUE \n", "3 The \"Documentenserver Klimawandel\" (Repository... TRUE TRUE \n", "4 Welcome to Creative Matter, a repository for t... TRUE FALSE \n", "\n", " mandate organisation_title \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 TRUE [KLIMZUG projects, Climate Service Center 2.0,... \n", "4 FALSE Skidmore College \n", "\n", " organisation_home_page location_country \\\n", "0 NaN fr \n", "1 NaN se \n", "2 NaN pt \n", "3 [http://www.climateservicecenter.de/, http://w... de \n", "4 http://www.skidmore.edu/ us \n", "\n", " location_city location_latitude location_longitude software \\\n", "0 NaN NaN NaN hal \n", "1 Uppsala 59.8667 17.6333 diva \n", "2 Bellevue, WA 47.6034 -122.155 dspace \n", "3 Hamburg 53.5511 9.9937 opus \n", "4 Saratoga Springs 43.0961 -73.7818 bepress \n", "\n", " geoname version subjects date note \\\n", "0 geoname_2_FR other NaN 2002-05-17 19:24:41 NaN \n", "1 geoname_2_SE other NaN 2005-12-08 13:15:22 NaN \n", "2 geoname_2_PT other NaN 2006-05-04 10:48:14 NaN \n", "3 geoname_2_DE other [GF, GE, HD, G1, S1] 2015-07-02 08:08:31 NaN \n", "4 geoname_2_US other NaN 2015-07-06 17:35:50 NaN \n", "\n", " suggestions activity_low activity_medium activity_high recordcount \\\n", "0 NaN 0 0 0 25 \n", "1 NaN 0 0 0 100 \n", "2 NaN NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN \n", "\n", " recordhistory fulltexts_total \\\n", "0 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... NaN \n", "1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100... NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name \\\n", "0 NaN NaN NaN [opendoar, celestial] \n", "1 NaN NaN NaN [opendoar, celestial] \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN [opendoar, celestial] \n", "4 NaN NaN NaN celestial \n", "\n", " registry_id submit_to submitted_to_name submitted_to_done \\\n", "0 [58, 669] NaN NaN NaN \n", "1 [258, 526] NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 [3408, 5881] NaN NaN NaN \n", "4 5882 NaN NaN NaN \n", "\n", " webometrics_rank webometrics_size webometrics_visibility \\\n", "0 NaN NaN NaN \n", "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " webometrics_rich_files webometrics_scholar monthly_deposits total_deposits \\\n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " association \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df = pd.read_csv('../data/raw/export_roar_CSV.csv', dtype='str')\n", "roar_df = roar_df.groupby('eprintid').aggregate(set)\n", "\n", "def value_or_list(cell_set):\n", " copy = set(cell_set)\n", " copy.discard(np.nan) \n", " if len(copy) == 0:\n", " return np.nan\n", " if len(copy) == 1:\n", " return copy.pop()\n", " return list(copy)\n", " \n", "roar_df = roar_df.applymap(value_or_list)\n", "roar_df.reset_index(inplace=True)\n", "\n", "roar_df.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidrev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
count53755375537553750.00.0537553755375537553751070.053750.00.00.00.063636363630.00.00.022450.00.026553685373426717615211153782412741273676439642265080365536813664463746715375125053602151872291229122912293229127025827025846034578293205205148148148148148756756217
unique537565812135NaNNaN537541273966415812107NaN2NaNNaNNaNNaN4856243NaNNaNNaN4NaNNaN25202507639941701468111330422238023771143186128872917311265390648302071717254167411704135118134117942567111481481481461433463423
top111archive1NaNNaNdisk0/00/00/00/012010-01-06 13:43:482011-07-06 08:24:532010-01-06 13:43:48institutional10164NaNshowNaNNaNNaNNaNbad_oai_pmh_url_0duplicate_titleDuplicate title to <xhtml:table xmlns:xhtml=\"h...2010-01-13 10:44:49discoveredNaNNaNNaN0NaNNaNFALSEhttp://eprints.upnjatim.ac.id/Repositorio Institucionalhttp://kce.docressources.info/ws/PMBWs_2http://producao.usp.br/sword/servicedocumenthttp://eprints.upnjatim.ac.id/cgi/latest_tool?...http://my.indexcopernicus.com/fredemorenoinfo:other:archives.eprints.org:importTRUETRUEFALSEChinese Academy of Science (中国科学院)http://www.cas.cn/usLima34.1607-118.139dspacegeoname_2_USotherK12006-05-04 10:48:14DSpace@Işık is a growing collection of Işık Un...This repository is hosted by the Texas Digital...0001000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...0000[opendoar, celestial]2479[opendoar, celestial, roarmap]opendoar2021-01-252446208248060,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...0russell_group
freq133253751333NaNNaN11681637951NaN5334NaNNaNNaNNaN153324538NaNNaNNaN2204NaNNaN258474252112275826522699998866925252307840477153992920152077221373395113114113114177549220520511135387387127
meanNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
stdNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
minNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
25%NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
50%NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
75%NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
maxNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " eprintid rev_number eprint_status userid importid source \\\n", "count 5375 5375 5375 5375 0.0 0.0 \n", "unique 5375 658 1 2135 NaN NaN \n", "top 1 11 archive 1 NaN NaN \n", "freq 1 332 5375 1333 NaN NaN \n", "mean NaN NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN NaN \n", "\n", " dir datestamp lastmod \\\n", "count 5375 5375 5375 \n", "unique 5375 4127 3966 \n", "top disk0/00/00/00/01 2010-01-06 13:43:48 2011-07-06 08:24:53 \n", "freq 1 16 8 \n", "mean NaN NaN NaN \n", "std NaN NaN NaN \n", "min NaN NaN NaN \n", "25% NaN NaN NaN \n", "50% NaN NaN NaN \n", "75% NaN NaN NaN \n", "max NaN NaN NaN \n", "\n", " status_changed type succeeds commentary \\\n", "count 5375 5375 107 0.0 \n", "unique 4158 12 107 NaN \n", "top 2010-01-06 13:43:48 institutional 10164 NaN \n", "freq 16 3795 1 NaN \n", "mean NaN NaN NaN NaN \n", "std NaN NaN NaN NaN \n", "min NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN \n", "max NaN NaN NaN NaN \n", "\n", " metadata_visibility latitude longitude relation_type relation_uri \\\n", "count 5375 0.0 0.0 0.0 0.0 \n", "unique 2 NaN NaN NaN NaN \n", "top show NaN NaN NaN NaN \n", "freq 5334 NaN NaN NaN NaN \n", "mean NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN \n", "\n", " item_issues_id item_issues_type \\\n", "count 63 63 \n", "unique 48 5 \n", "top bad_oai_pmh_url_0 duplicate_title \n", "freq 15 33 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " item_issues_description \\\n", "count 63 \n", "unique 62 \n", "top Duplicate title to \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtypeattributes.created-atattributes.updated-atattributes.metadata.doiattributes.metadata.nameattributes.metadata.statusattributes.metadata.contactsattributes.metadata.homepageattributes.metadata.identifierattributes.metadata.descriptionattributes.metadata.support-linksattributes.metadata.year-creationattributes.metadata.data-processesattributes.legacy-idsattributes.fairsharing-registryattributes.record-typeattributes.subjectsattributes.domainsattributes.taxonomiesattributes.user-defined-tagsattributes.countriesattributes.nameattributes.abbreviationattributes.urlattributes.doiattributes.fairsharing-licenceattributes.descriptionattributes.publicationsattributes.licence-linksattributes.metadata.citationsattributes.metadata.abbreviationattributes.metadata.access-pointsattributes.metadata.associated-toolsattributes.metadata.deprecation-dateattributes.metadata.deprecation-reasonattributes.metadata.tombstone
01723fairsharing-records2014-11-04T15:23:40.000Z2021-09-30T11:39:06.829Z10.25504/FAIRsharing.8t18teCell Image Libraryready[{'contact-name': 'David Orloff', 'contact-ema...http://www.cellimagelibrary.org1723This library is a public and easily accessible...[{'url': 'http://www.cellimagelibrary.org/page...2010.0[{'name': 'live update', 'type': 'data release...[biodbcore-000180, bsg-d000180]Databaserepository[Cell Biology, Life Science][Cell, Microscopy, Light microscopy, Electron ...[All][][United States]FAIRsharing record for: Cell Image LibraryNonehttps://fairsharing.org/10.25504/FAIRsharing.8...10.25504/FAIRsharing.8t18tehttps://creativecommons.org/licenses/by-sa/4.0...This FAIRsharing record describes: This librar...[{'id': 232, 'pubmed_id': 23203874, 'title': '...[{'licence-name': 'Cell Image Library Data Pol...NaNNaNNaNNaNNaNNaNNaN
13101fairsharing-records2020-09-16T08:49:13.000Z2021-09-30T11:36:45.452ZNaNWHOI Ship Data-Grabber SystemreadyNaNhttp://4dgeo.whoi.edu/shipdata/SDG_shipdata.html3101The WHOI Ship DataGrabber system provides the ...[{'url': 'http://4dgeo.whoi.edu/shipdata/SDG_o...2004.0[{'url': 'http://4dgeo.whoi.edu/sdg-bin/dv_mai...[biodbcore-001609, bsg-d001609]Databaserepository[Earth Science, Water Research, Oceanography][][Not applicable][subseafloor environments][United States]FAIRsharing record for: WHOI Ship Data-Grabber...Nonehttps://fairsharing.org/fairsharing_records/3101Nonehttps://creativecommons.org/licenses/by-sa/4.0...This FAIRsharing record describes: The WHOI Sh...[][{'licence-name': 'NDSF Data Archive Policy', ...NaNNaNNaNNaNNaNNaNNaN
22649fairsharing-records2018-08-07T20:23:32.000Z2021-09-30T11:39:07.898ZNaNElectron Microscope Public Image Archiveready[{'contact-name': 'General contact', 'contact-...https://www.ebi.ac.uk/pdbe/emdb/empiar/2649EMPIAR, the Electron Microscopy Public Image A...[{'url': 'https://www.ebi.ac.uk/support/EMPIAR...2015.0[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi...[biodbcore-001140, bsg-d001140]Databaserepository[Bioinformatics, Biology][Protein image, Microscopy, Electron microscop...[All][][Greece, Czech Republic, United Kingdom, Icela...FAIRsharing record for: Electron Microscope Pu...EMPIARhttps://fairsharing.org/fairsharing_records/2649Nonehttps://creativecommons.org/licenses/by-sa/4.0...This FAIRsharing record describes: EMPIAR, the...[{'id': 2232, 'pubmed_id': 27067018, 'title': ...[{'licence-name': 'EMBL-EBI Terms of Use', 'li...[{'doi': '10.1038/nmeth.3806', 'pubmed-id': 27...EMPIAR[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi...[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi...NaNNaNNaN
32657fairsharing-records2018-08-13T15:12:11.000Z2021-09-30T11:37:28.736Z10.25504/FAIRsharing.tnByoGClinicalStudyDataRequest.comready[{'contact-email': 'support@clinicalstudydatar...https://clinicalstudydatarequest.com/2657ClinicalStudyDataRequest.com (CSDR) is a conso...[{'url': 'https://clinicalstudydatarequest.com...2014.0[{'url': 'https://clinicalstudydatarequest.com...[biodbcore-001149, bsg-d001149]Databaserepository[Preclinical Studies, Biomedical Science][][Homo sapiens][][Worldwide]FAIRsharing record for: ClinicalStudyDataReque...CSDRhttps://fairsharing.org/10.25504/FAIRsharing.t...10.25504/FAIRsharing.tnByoGhttps://creativecommons.org/licenses/by-sa/4.0...This FAIRsharing record describes: ClinicalStu...[][{'licence-name': 'CSDR Data Sharing Agreement...NaNCSDRNaNNaNNaNNaNNaN
42078fairsharing-records2014-11-04T15:23:40.000Z2021-09-30T11:34:43.129Z10.25504/FAIRsharing.3axym7Germplasm Resources Information Networkready[{'contact-email': 'dbmu@ars-grin.gov'}]https://www.ars-grin.gov/2078GRIN provides National Genetic Resources Progr...[{'url': 'https://www.ars-grin.gov/Pages/Colle...2010.0[{'url': 'https://www.ars-grin.gov/', 'name': ...[biodbcore-000546, bsg-d000546]Databaserepository[Life Science][Cell, Cell culture, Germplasm][Bacteria, Metazoa, Viridiplantae][][United States]FAIRsharing record for: Germplasm Resources In...GRINhttps://fairsharing.org/10.25504/FAIRsharing.3...10.25504/FAIRsharing.3axym7https://creativecommons.org/licenses/by-sa/4.0...This FAIRsharing record describes: GRIN provid...[][]NaNGRINNaNNaNNaNNaNNaN
\n", "" ], "text/plain": [ " id type attributes.created-at \\\n", "0 1723 fairsharing-records 2014-11-04T15:23:40.000Z \n", "1 3101 fairsharing-records 2020-09-16T08:49:13.000Z \n", "2 2649 fairsharing-records 2018-08-07T20:23:32.000Z \n", "3 2657 fairsharing-records 2018-08-13T15:12:11.000Z \n", "4 2078 fairsharing-records 2014-11-04T15:23:40.000Z \n", "\n", " attributes.updated-at attributes.metadata.doi \\\n", "0 2021-09-30T11:39:06.829Z 10.25504/FAIRsharing.8t18te \n", "1 2021-09-30T11:36:45.452Z NaN \n", "2 2021-09-30T11:39:07.898Z NaN \n", "3 2021-09-30T11:37:28.736Z 10.25504/FAIRsharing.tnByoG \n", "4 2021-09-30T11:34:43.129Z 10.25504/FAIRsharing.3axym7 \n", "\n", " attributes.metadata.name attributes.metadata.status \\\n", "0 Cell Image Library ready \n", "1 WHOI Ship Data-Grabber System ready \n", "2 Electron Microscope Public Image Archive ready \n", "3 ClinicalStudyDataRequest.com ready \n", "4 Germplasm Resources Information Network ready \n", "\n", " attributes.metadata.contacts \\\n", "0 [{'contact-name': 'David Orloff', 'contact-ema... \n", "1 NaN \n", "2 [{'contact-name': 'General contact', 'contact-... \n", "3 [{'contact-email': 'support@clinicalstudydatar... \n", "4 [{'contact-email': 'dbmu@ars-grin.gov'}] \n", "\n", " attributes.metadata.homepage \\\n", "0 http://www.cellimagelibrary.org \n", "1 http://4dgeo.whoi.edu/shipdata/SDG_shipdata.html \n", "2 https://www.ebi.ac.uk/pdbe/emdb/empiar/ \n", "3 https://clinicalstudydatarequest.com/ \n", "4 https://www.ars-grin.gov/ \n", "\n", " attributes.metadata.identifier \\\n", "0 1723 \n", "1 3101 \n", "2 2649 \n", "3 2657 \n", "4 2078 \n", "\n", " attributes.metadata.description \\\n", "0 This library is a public and easily accessible... \n", "1 The WHOI Ship DataGrabber system provides the ... \n", "2 EMPIAR, the Electron Microscopy Public Image A... \n", "3 ClinicalStudyDataRequest.com (CSDR) is a conso... \n", "4 GRIN provides National Genetic Resources Progr... \n", "\n", " attributes.metadata.support-links \\\n", "0 [{'url': 'http://www.cellimagelibrary.org/page... \n", "1 [{'url': 'http://4dgeo.whoi.edu/shipdata/SDG_o... \n", "2 [{'url': 'https://www.ebi.ac.uk/support/EMPIAR... \n", "3 [{'url': 'https://clinicalstudydatarequest.com... \n", "4 [{'url': 'https://www.ars-grin.gov/Pages/Colle... \n", "\n", " attributes.metadata.year-creation \\\n", "0 2010.0 \n", "1 2004.0 \n", "2 2015.0 \n", "3 2014.0 \n", "4 2010.0 \n", "\n", " attributes.metadata.data-processes \\\n", "0 [{'name': 'live update', 'type': 'data release... \n", "1 [{'url': 'http://4dgeo.whoi.edu/sdg-bin/dv_mai... \n", "2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n", "3 [{'url': 'https://clinicalstudydatarequest.com... \n", "4 [{'url': 'https://www.ars-grin.gov/', 'name': ... \n", "\n", " attributes.legacy-ids attributes.fairsharing-registry \\\n", "0 [biodbcore-000180, bsg-d000180] Database \n", "1 [biodbcore-001609, bsg-d001609] Database \n", "2 [biodbcore-001140, bsg-d001140] Database \n", "3 [biodbcore-001149, bsg-d001149] Database \n", "4 [biodbcore-000546, bsg-d000546] Database \n", "\n", " attributes.record-type attributes.subjects \\\n", "0 repository [Cell Biology, Life Science] \n", "1 repository [Earth Science, Water Research, Oceanography] \n", "2 repository [Bioinformatics, Biology] \n", "3 repository [Preclinical Studies, Biomedical Science] \n", "4 repository [Life Science] \n", "\n", " attributes.domains \\\n", "0 [Cell, Microscopy, Light microscopy, Electron ... \n", "1 [] \n", "2 [Protein image, Microscopy, Electron microscop... \n", "3 [] \n", "4 [Cell, Cell culture, Germplasm] \n", "\n", " attributes.taxonomies attributes.user-defined-tags \\\n", "0 [All] [] \n", "1 [Not applicable] [subseafloor environments] \n", "2 [All] [] \n", "3 [Homo sapiens] [] \n", "4 [Bacteria, Metazoa, Viridiplantae] [] \n", "\n", " attributes.countries \\\n", "0 [United States] \n", "1 [United States] \n", "2 [Greece, Czech Republic, United Kingdom, Icela... \n", "3 [Worldwide] \n", "4 [United States] \n", "\n", " attributes.name attributes.abbreviation \\\n", "0 FAIRsharing record for: Cell Image Library None \n", "1 FAIRsharing record for: WHOI Ship Data-Grabber... None \n", "2 FAIRsharing record for: Electron Microscope Pu... EMPIAR \n", "3 FAIRsharing record for: ClinicalStudyDataReque... CSDR \n", "4 FAIRsharing record for: Germplasm Resources In... GRIN \n", "\n", " attributes.url \\\n", "0 https://fairsharing.org/10.25504/FAIRsharing.8... \n", "1 https://fairsharing.org/fairsharing_records/3101 \n", "2 https://fairsharing.org/fairsharing_records/2649 \n", "3 https://fairsharing.org/10.25504/FAIRsharing.t... \n", "4 https://fairsharing.org/10.25504/FAIRsharing.3... \n", "\n", " attributes.doi \\\n", "0 10.25504/FAIRsharing.8t18te \n", "1 None \n", "2 None \n", "3 10.25504/FAIRsharing.tnByoG \n", "4 10.25504/FAIRsharing.3axym7 \n", "\n", " attributes.fairsharing-licence \\\n", "0 https://creativecommons.org/licenses/by-sa/4.0... \n", "1 https://creativecommons.org/licenses/by-sa/4.0... \n", "2 https://creativecommons.org/licenses/by-sa/4.0... \n", "3 https://creativecommons.org/licenses/by-sa/4.0... \n", "4 https://creativecommons.org/licenses/by-sa/4.0... \n", "\n", " attributes.description \\\n", "0 This FAIRsharing record describes: This librar... \n", "1 This FAIRsharing record describes: The WHOI Sh... \n", "2 This FAIRsharing record describes: EMPIAR, the... \n", "3 This FAIRsharing record describes: ClinicalStu... \n", "4 This FAIRsharing record describes: GRIN provid... \n", "\n", " attributes.publications \\\n", "0 [{'id': 232, 'pubmed_id': 23203874, 'title': '... \n", "1 [] \n", "2 [{'id': 2232, 'pubmed_id': 27067018, 'title': ... \n", "3 [] \n", "4 [] \n", "\n", " attributes.licence-links \\\n", "0 [{'licence-name': 'Cell Image Library Data Pol... \n", "1 [{'licence-name': 'NDSF Data Archive Policy', ... \n", "2 [{'licence-name': 'EMBL-EBI Terms of Use', 'li... \n", "3 [{'licence-name': 'CSDR Data Sharing Agreement... \n", "4 [] \n", "\n", " attributes.metadata.citations \\\n", "0 NaN \n", "1 NaN \n", "2 [{'doi': '10.1038/nmeth.3806', 'pubmed-id': 27... \n", "3 NaN \n", "4 NaN \n", "\n", " attributes.metadata.abbreviation \\\n", "0 NaN \n", "1 NaN \n", "2 EMPIAR \n", "3 CSDR \n", "4 GRIN \n", "\n", " attributes.metadata.access-points \\\n", "0 NaN \n", "1 NaN \n", "2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n", "3 NaN \n", "4 NaN \n", "\n", " attributes.metadata.associated-tools \\\n", "0 NaN \n", "1 NaN \n", "2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n", "3 NaN \n", "4 NaN \n", "\n", " attributes.metadata.deprecation-date attributes.metadata.deprecation-reason \\\n", "0 NaN NaN \n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " attributes.metadata.tombstone \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN " ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open('../data/raw/fairsharing_dump_api_09_2021.json') as f:\n", " lines = f.read().splitlines()\n", " \n", "fairsharing_df = pd.DataFrame(lines)\n", "fairsharing_df.columns = ['json_element']\n", "fairsharing_df['json_element'].apply(json.loads)\n", "fairsharing_df = pd.json_normalize(fairsharing_df['json_element'].apply(json.loads))\n", "\n", "fairsharing_df.head()" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtypeattributes.created-atattributes.updated-atattributes.metadata.doiattributes.metadata.nameattributes.metadata.statusattributes.metadata.contactsattributes.metadata.homepageattributes.metadata.identifierattributes.metadata.descriptionattributes.metadata.support-linksattributes.metadata.year-creationattributes.metadata.data-processesattributes.legacy-idsattributes.fairsharing-registryattributes.record-typeattributes.subjectsattributes.domainsattributes.taxonomiesattributes.user-defined-tagsattributes.countriesattributes.nameattributes.abbreviationattributes.urlattributes.doiattributes.fairsharing-licenceattributes.descriptionattributes.publicationsattributes.licence-linksattributes.metadata.citationsattributes.metadata.abbreviationattributes.metadata.access-pointsattributes.metadata.associated-toolsattributes.metadata.deprecation-dateattributes.metadata.deprecation-reasonattributes.metadata.tombstone
count1797179717971797135417971797167817971797.000000179716081492.0000001565179717971797179717971797179717971797163817971354179717971797179732616384496182172171
unique179711162179713541796415761797NaN17971594NaN1563179713888116337838418517961626179713541179711091082320162644461555861
top1723fairsharing-records2014-11-04T15:23:40.000Z2021-09-30T11:39:06.829Z10.25504/FAIRsharing.8t18teOmicsDBready[{'contact-name': 'Sam Hokin', 'contact-email'...http://www.cellimagelibrary.orgNaNThis library is a public and easily accessible...[{'url': 'https://github.com/gbif/ipt/wiki/IPT...NaN[{'url': 'http://qf.iodp.tamu.edu/qfsearch/sea...[biodbcore-000180, bsg-d000180]Databaserepository[Life Science][][All][][United States]FAIRsharing record for: OmicsDBCGDhttps://fairsharing.org/10.25504/FAIRsharing.8...10.25504/FAIRsharing.8t18tehttps://creativecommons.org/licenses/by-sa/4.0...This FAIRsharing record describes: This librar...[][][{'doi': '10.1093/nar/gkz890', 'pubmed-id': 31...CGD[{'url': 'https://github.com/Ensembl', 'name':...[{'url': 'http://www.h-invitational.jp/hinv/bl...2021-9-17This resource is no longer available at the st...True
freq11797636112154061NaN16NaN21179792635026550211935942311179716617166332841131
meanNaNNaNNaNNaNNaNNaNNaNNaNNaN2446.100167NaNNaN2007.636059NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
stdNaNNaNNaNNaNNaNNaNNaNNaNNaN520.058757NaNNaN10.953269NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
minNaNNaNNaNNaNNaNNaNNaNNaNNaN1547.000000NaNNaN1894.000000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
25%NaNNaNNaNNaNNaNNaNNaNNaNNaN1996.000000NaNNaN2004.000000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
50%NaNNaNNaNNaNNaNNaNNaNNaNNaN2445.000000NaNNaN2010.000000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
75%NaNNaNNaNNaNNaNNaNNaNNaNNaN2897.000000NaNNaN2014.000000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
maxNaNNaNNaNNaNNaNNaNNaNNaNNaN3346.000000NaNNaN2021.000000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " id type attributes.created-at \\\n", "count 1797 1797 1797 \n", "unique 1797 1 1162 \n", "top 1723 fairsharing-records 2014-11-04T15:23:40.000Z \n", "freq 1 1797 636 \n", "mean NaN NaN NaN \n", "std NaN NaN NaN \n", "min NaN NaN NaN \n", "25% NaN NaN NaN \n", "50% NaN NaN NaN \n", "75% NaN NaN NaN \n", "max NaN NaN NaN \n", "\n", " attributes.updated-at attributes.metadata.doi \\\n", "count 1797 1354 \n", "unique 1797 1354 \n", "top 2021-09-30T11:39:06.829Z 10.25504/FAIRsharing.8t18te \n", "freq 1 1 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " attributes.metadata.name attributes.metadata.status \\\n", "count 1797 1797 \n", "unique 1796 4 \n", "top OmicsDB ready \n", "freq 2 1540 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " attributes.metadata.contacts \\\n", "count 1678 \n", "unique 1576 \n", "top [{'contact-name': 'Sam Hokin', 'contact-email'... \n", "freq 6 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.homepage attributes.metadata.identifier \\\n", "count 1797 1797.000000 \n", "unique 1797 NaN \n", "top http://www.cellimagelibrary.org NaN \n", "freq 1 NaN \n", "mean NaN 2446.100167 \n", "std NaN 520.058757 \n", "min NaN 1547.000000 \n", "25% NaN 1996.000000 \n", "50% NaN 2445.000000 \n", "75% NaN 2897.000000 \n", "max NaN 3346.000000 \n", "\n", " attributes.metadata.description \\\n", "count 1797 \n", "unique 1797 \n", "top This library is a public and easily accessible... \n", "freq 1 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.support-links \\\n", "count 1608 \n", "unique 1594 \n", "top [{'url': 'https://github.com/gbif/ipt/wiki/IPT... \n", "freq 6 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.year-creation \\\n", "count 1492.000000 \n", "unique NaN \n", "top NaN \n", "freq NaN \n", "mean 2007.636059 \n", "std 10.953269 \n", "min 1894.000000 \n", "25% 2004.000000 \n", "50% 2010.000000 \n", "75% 2014.000000 \n", "max 2021.000000 \n", "\n", " attributes.metadata.data-processes \\\n", "count 1565 \n", "unique 1563 \n", "top [{'url': 'http://qf.iodp.tamu.edu/qfsearch/sea... \n", "freq 2 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.legacy-ids attributes.fairsharing-registry \\\n", "count 1797 1797 \n", "unique 1797 1 \n", "top [biodbcore-000180, bsg-d000180] Database \n", "freq 1 1797 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " attributes.record-type attributes.subjects attributes.domains \\\n", "count 1797 1797 1797 \n", "unique 3 888 1163 \n", "top repository [Life Science] [] \n", "freq 926 350 265 \n", "mean NaN NaN NaN \n", "std NaN NaN NaN \n", "min NaN NaN NaN \n", "25% NaN NaN NaN \n", "50% NaN NaN NaN \n", "75% NaN NaN NaN \n", "max NaN NaN NaN \n", "\n", " attributes.taxonomies attributes.user-defined-tags \\\n", "count 1797 1797 \n", "unique 378 384 \n", "top [All] [] \n", "freq 502 1193 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " attributes.countries attributes.name \\\n", "count 1797 1797 \n", "unique 185 1796 \n", "top [United States] FAIRsharing record for: OmicsDB \n", "freq 594 2 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " attributes.abbreviation \\\n", "count 1638 \n", "unique 1626 \n", "top CGD \n", "freq 3 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.url \\\n", "count 1797 \n", "unique 1797 \n", "top https://fairsharing.org/10.25504/FAIRsharing.8... \n", "freq 1 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.doi \\\n", "count 1354 \n", "unique 1354 \n", "top 10.25504/FAIRsharing.8t18te \n", "freq 1 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.fairsharing-licence \\\n", "count 1797 \n", "unique 1 \n", "top https://creativecommons.org/licenses/by-sa/4.0... \n", "freq 1797 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.description \\\n", "count 1797 \n", "unique 1797 \n", "top This FAIRsharing record describes: This librar... \n", "freq 1 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.publications attributes.licence-links \\\n", "count 1797 1797 \n", "unique 1109 1082 \n", "top [] [] \n", "freq 661 716 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " attributes.metadata.citations \\\n", "count 326 \n", "unique 320 \n", "top [{'doi': '10.1093/nar/gkz890', 'pubmed-id': 31... \n", "freq 6 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.abbreviation \\\n", "count 1638 \n", "unique 1626 \n", "top CGD \n", "freq 3 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.access-points \\\n", "count 449 \n", "unique 444 \n", "top [{'url': 'https://github.com/Ensembl', 'name':... \n", "freq 3 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.associated-tools \\\n", "count 618 \n", "unique 615 \n", "top [{'url': 'http://www.h-invitational.jp/hinv/bl... \n", "freq 2 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.deprecation-date \\\n", "count 217 \n", "unique 55 \n", "top 2021-9-17 \n", "freq 84 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.deprecation-reason \\\n", "count 217 \n", "unique 86 \n", "top This resource is no longer available at the st... \n", "freq 113 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " attributes.metadata.tombstone \n", "count 1 \n", "unique 1 \n", "top True \n", "freq 1 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN " ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fairsharing_df.describe(include='all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Subjects analysis" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**re3data**" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orgIdentifiersubject
0r3d1000000011 Humanities and Social Sciences
0r3d100000001111 Social Sciences
0r3d10000000111104 Political Science
0r3d100000001112 Economics
0r3d10000000112 Social and Behavioural Sciences
.........
2738r3d100013652102 History
2738r3d100013652105 Literary Studies
2738r3d100013652108 Philosophy
2738r3d10001365210801 History of Philosophy
2738r3d10001365211 Humanities
\n", "

16654 rows × 2 columns

\n", "
" ], "text/plain": [ " orgIdentifier subject\n", "0 r3d100000001 1 Humanities and Social Sciences\n", "0 r3d100000001 111 Social Sciences\n", "0 r3d100000001 11104 Political Science\n", "0 r3d100000001 112 Economics\n", "0 r3d100000001 12 Social and Behavioural Sciences\n", "... ... ...\n", "2738 r3d100013652 102 History\n", "2738 r3d100013652 105 Literary Studies\n", "2738 r3d100013652 108 Philosophy\n", "2738 r3d100013652 10801 History of Philosophy\n", "2738 r3d100013652 11 Humanities\n", "\n", "[16654 rows x 2 columns]" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_subjects = re3data_df[['orgIdentifier', 'subject']].explode('subject')\n", "re3data_subjects['subject'] = re3data_subjects['subject'].apply(lambda x: x['name'] if x is not np.nan else np.nan)\n", "re3data_subjects" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "name": "re3data tier 1-digits", "type": "bar", "x": [ "4 Engineering Sciences", "3 Natural Sciences", "2 Life Sciences", "1 Humanities and Social Sciences" ], "y": [ 525, 1366, 1491, 988 ] }, { "name": "re3data tier 2-digits", "type": "bar", "x": [ "45 Construction Engineering and Architecture", "44 Computer Science, Electrical and System Engineering", "43 Materials Science and Engineering", "42 Thermal Engineering/Process Engineering", "41 Mechanical and industrial Engineering", "34 Geosciences (including Geography)", "33 Mathematics", "32 Physics", "31 Chemistry", "23 Agriculture, Forestry, Horticulture and Veterinary Medicine", "22 Medicine", "21 Biology", "12 Social and Behavioural Sciences", "11 Humanities" ], "y": [ 49, 152, 41, 18, 11, 783, 33, 314, 223, 198, 638, 904, 452, 318 ] }, { "name": "re3data tier 3-digits", "type": "bar", "x": [ "410 Construction Engineering and Architecture", "409 Computer Science", "408 Electrical Engineering", "407 Systems Engineering", "406 Materials Science", "405 Materials Engineering", "404 Heat Energy Technology, Thermal Machines, Fluid Mechanics", "403 Process Engineering, Technical Chemistry", "402 Mechanics and Constructive Mechanical Engineering", "318 Water Research", "317 Geography", "316 Geochemistry, Mineralogy and Crystallography", "315 Geophysics and Geodesy", "314 Geology and Palaeontology", "313 Atmospheric Science and Oceanography", "312 Mathematics", "311 Astrophysics and Astronomy", "310 Statistical Physics, Soft Matter, Biological Physics, Nonlinear Dynamics", "309 Particles, Nuclei and Fields", "308 Optics, Quantum Optics and Physics of Atoms, Molecules and Plasmas", "307 Condensed Matter Physics", "306 Polymer Research", "305 Biological Chemistry and Food Chemistry", "304 Analytical Chemistry, Method Development (Chemistry)", "303 Physical and Theoretical Chemistry", "302 Chemical Solid State and Surface Research", "301 Molecular Chemistry", "207 Agriculture, Forestry, Horticulture and Veterinary Medicine", "206 Neurosciences", "205 Medicine", "204 Microbiology, Virology and Immunology", "203 Zoology", "202 Plant Sciences", "201 Basic Biological and Medical Research", "113 Jurisprudence", "112 Economics", "111 Social Sciences", "110 Psychology", "109 Education Sciences", "108 Philosophy", "107 Theology", "106 Non-European Languages and Cultures, Social and Cultural Anthropology, Jewish Studies and Religious Studies", "105 Literary Studies", "104 Linguistics", "103 Fine Arts, Music, Theatre and Media Studies", "102 History", "101 Ancient Cultures" ], "y": [ 41, 92, 16, 24, 25, 9, 9, 10, 6, 148, 162, 91, 265, 81, 412, 7, 179, 5, 51, 75, 25, 4, 22, 22, 33, 22, 44, 131, 86, 404, 218, 245, 217, 524, 51, 213, 294, 38, 64, 9, 22, 45, 29, 107, 76, 109, 69 ] }, { "name": "re3data tier 5-digits", "type": "bar", "x": [ "41006 Geotechnics, Hydraulic Engineering", "41004 Sructural Engineering, Building Informatics, Construction Operation", "41003 Construction Material Sciences, Chemistry, Building Physics", "41002 Urbanism, Spatial Planning, Transportation and Infrastructure Planning, Landscape Planning", "41001 Architecture, Building and Construction History, Sustainable Building Technology, Building Design", "40904 Artificial Intelligence, Image and Language Processing", "40903 Operating, Communication and Information Systems", "40902 Software Technology", "40901 Theoretical Computer Science", "40803 Electrical Energy Generation, Distribution, Application", "40802 Communication, High-Frequency and Network Technology, Theoretical Electrical Engineering", "40801 Electronic Semiconductors, Components, Circuits, Systems", "40705 Human Factors, Ergonomics, Human-Machine Systems", "40704 Traffic and Transport Systems, Logistics", "40702 Measurement Systems", "40701 Automation, Control Systems, Robotics, Mechatronics", "40605 Biomaterials", "40603 Microstructural Mechanical Properties of Materials", "40601 Thermodynamics and Kinetics of Materials", "40503 Composite Materials", "40502 Sintered Metallic and Ceramic Materials", "40501 Metallurgical and Thermal Processes, Thermomechanical Treatment of Materials", "40402 Technical Thermodynamics", "40401 Energy Process Engineering", "40304 Biological Process Engineering", "40302 Technical Chemistry", "40301 Chemical and Thermal Process Engineering", "40204 Acoustics", "31801 Hydrogeology, Hydrology, Limnology, Urban Water Management, Water Chemistry, Integrated Water Resources Management", "31702 Human Geography", "31701 Physical Geography", "31601 Geochemistry, Mineralogy and Crystallography", "31502 Geodesy, Photogrammetry, Remote Sensing, Geoinformatics, Cartogaphy", "31501 Geophysics", "31401 Geology and Palaeontology", "31302 Oceanography", "31301 Atmospheric Science", "31201 Mathematics", "31101 Astrophysics and Astronomy", "31001 Statistical Physics, Soft Matter, Biological Physics, Nonlinear Dynamics", "30901 Particles, Nuclei and Fields", "30801 Optics, Quantum Optics, Atoms, Molecules, Plasmas", "30702 Theoretical Condensed Matter Physics", "30701 Experimental Condensed Matter Physics", "30603 Polymer Materials", "30602 Experimental and Theoretical Physics of Polymers", "30601 Preparatory and Physical Chemistry of Polymers", "30502 Food Chemistry", "30501 Biological and Biomimetic Chemistry", "30401 Analytical Chemistry, Method Development (Chemistry)", "30302 General Theoretical Chemistry", "30301 Physical Chemistry of Molecules, Interfaces and Liquids - Spectroscopy, Kinetics", "30203 Theory and Modelling", "30202 Physical Chemistry of Solids and Surfaces, Material Characterisation", "30201 Solid State and Surface Chemistry, Material Synthesis", "30102 Organic Molecular Chemistry", "30101 Inorganic Molecular Chemistry", "20714 Basic Research on Pathogenesis, Diagnostics and Therapy and Clinical Veterinary Medicine", "20713 Basic Veterinary Medical Science", "20711 Animal Husbandry, Breeding and Hygiene", "20710 Basic Forest Research", "20709 Inventory Control and Use of Forest Resources", "20708 Agricultural Economics and Sociology", "20707 Agricultural and Food Process Engineering", "20705 Plant Breeding", "20704 Ecology of Agricultural Landscapes", "20703 Plant Nutrition", "20702 Plant Cultivation", "20701 Soil Sciences", "20611 Clinical Neurosciences III - Ophthalmology", "20609 Biological Psychiatry", "20608 Clinical Neurosciences I - Neurology, Neurosurgery", "20606 Cognitive Neuroscience and Neuroimaging", "20605 Comparative Neurobiology", "20604 Systemic Neuroscience, Computational Neuroscience, Behaviour", "20603 Developmental Neurobiology", "20602 Cellular Neuroscience", "20601 Molecular Neuroscience and Neurogenetics", "20532 Biomedical Technology and Medical Physics", "20531 Radiation Oncology and Radiobiology", "20530 Radiology and Nuclear Medicine", "20528 Dentistry, Oral Surgery", "20527 Traumatology and Orthopaedics", "20526 Cardiothoracic Surgery", "20524 Gerontology and Geriatric Medicine", "20523 Urology", "20522 Reproductive Medicine/Biology", "20521 Gynaecology and Obstetrics", "20520 Pediatric and Adolescent Medicine", "20519 Dermatology", "20518 Rheumatology, Clinical Immunology, Allergology", "20517 Endocrinology, Diabetology", "20515 Gastroenterology, Metabolism", "20514 Hematology, Oncology, Transfusion Medicine", "20513 Pneumology, Clinical Infectiology Intensive Care Medicine", "20512 Cardiology, Angiology", "20510 Toxicology and Occupational Medicine", "20509 Pharmacology", "20508 Pharmacy", "20507 Clinical Chemistry and Pathobiochemistry", "20506 Pathology and Forensic Medicine", "20505 Nutritional Sciences", "20504 Physiology", "20503 Human Genetics", "20502 Public Health, Health Services Research, Social Medicine", "20501 Epidemiology, Medical Biometry, Medical Informatics", "20405 Immunology", "20404 Virology", "20403 Medical Microbiology, Molecular Infection Biology", "20402 Microbial Ecology and Applied Microbiology", "20401 Metabolism, Biochemistry and Genetics of Microorganisms", "20306 Animal Genetics, Cell and Developmental Biology", "20305 Biochemistry and Animal Physiology", "20304 Sensory and Behavioural Biology", "20303 Animal Ecology, Biodiversity and Ecosystem Research", "20302 Evolution, Anthropology", "20301 Systematics and Morphology", "20207 Plant Genetics", "20206 Plant Cell and Developmental Biology", "20205 Plant Biochemistry and Biophysics", "20204 Plant Physiology", "20203 Inter-organismic Interactions of Plants", "20202 Plant Ecology and Ecosystem Analysis", "20201 Plant Systematics and Evolution", "20108 Anatomy", "20107 Bioinformatics and Theoretical Biology", "20106 Developmental Biology", "20105 General Genetics", "20104 Structural Biology", "20103 Cell Biology", "20102 Biophysics", "20101 Biochemistry", "11305 Criminology", "11304 Criminal Law and Law of Criminal Procedure", "11303 Public Law", "11302 Private Law", "11301 Legal and Political Philosophy, Legal History, Legal Theory", "11206 Economic and Social History", "11205 Statistics and Econometrics", "11204 Business Administration", "11203 Public Finance", "11202 Economic and Social Policy", "11201 Economic Theory", "11104 Political Science", "11103 Communication Science", "11102 Empirical Social Research", "11101 Sociological Theory", "11004 Differential Psychology, Clinical Psychology, Medical Psychology, Methodology", "11003 Social Psychology, Industrial and Organisational Psychology", "11002 Developmental and Educational Psychology", "11001 General, Biological and Mathematical Psychology", "10903 Research on Socialization and Educational Institutions and Professions", "10902 Research on Teaching, Learning and Training", "10901 General Education and History of Education", "10801 History of Philosophy", "10702 Roman Catholic Theology", "10701 Protestant Theology", "10605 Religious Studies and Jewish Studies", "10604 Islamic Studies, Arabian Studies, Semitic Studies", "10603 African, American and Oceania Studies", "10602 Asian Studies", "10601 Social and Cultural Anthropology and Ethnology/Folklore", "10504 General and Comparative Literature and Cultural Studies", "10503 European and American Literature", "10501 Medieval German Literature", "10403 Typology, Non-European Languages, Historical Linguistics", "10402 Individual Linguistics", "10401 General and Applied Linguistics", "10303 Theatre and Media Studies", "10302 Musicology", "10301 Art History", "10204 History of Science", "10203 Modern and Current History", "10202 Early Modern History", "10201 Medieval History", "10105 Egyptology and Ancient Near Eastern Studies", "10104 Classical Archaeology", "10103 Ancient History", "10102 Classical Philology", "10101 Prehistory" ], "y": [ 3, 4, 1, 17, 16, 24, 4, 10, 1, 7, 3, 1, 5, 13, 1, 2, 2, 1, 2, 2, 2, 1, 1, 3, 2, 1, 1, 6, 50, 27, 23, 19, 100, 66, 17, 173, 132, 1, 30, 1, 11, 21, 1, 3, 2, 1, 2, 3, 5, 4, 4, 23, 6, 8, 5, 9, 6, 4, 7, 4, 23, 13, 18, 7, 9, 32, 3, 8, 35, 3, 1, 1, 17, 1, 9, 1, 1, 6, 17, 5, 9, 2, 1, 1, 3, 1, 2, 5, 10, 2, 3, 4, 7, 16, 5, 2, 12, 25, 13, 4, 7, 4, 11, 117, 135, 59, 22, 19, 17, 9, 38, 93, 9, 5, 84, 21, 14, 67, 4, 11, 4, 1, 71, 16, 13, 178, 4, 222, 45, 85, 11, 72, 6, 2, 8, 1, 9, 12, 65, 16, 15, 51, 2, 58, 14, 103, 3, 4, 3, 3, 2, 9, 10, 3, 2, 3, 3, 9, 4, 5, 3, 19, 2, 3, 1, 8, 4, 4, 5, 18, 20, 10, 18, 7, 8, 7, 15, 6, 1, 7 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Subject coverage re3data" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data = re3data_subjects.groupby('subject')[['orgIdentifier']].count().sort_values('subject', ascending=False)\n", "data\n", "plot = [\n", " go.Bar(\n", " x=data[data.index.str.contains('^\\d{%s}\\s' % tier, regex=True)].index,\n", " y=data[data.index.str.contains('^\\d{%s}\\s' % tier, regex=True)]['orgIdentifier'],\n", " name='re3data tier %s-digits' % tier\n", " ) for tier in [1,2,3,5]\n", "] \n", "\n", "layout = go.Layout(\n", " title='Subject coverage re3data',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "\n", "fig = go.Figure(plot, layout).show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**OpenDOAR**" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "opendoar_subjects = opendoar_df.explode('repository_metadata.content_subjects')" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "type": "bar", "x": [ "multidisciplinary", "health and medicine", "science general", "technology general", "business and economics", "social sciences general", "law and politics", "history and archaeology", "arts and humanities general", "education", "ecology and environment", "agriculture, food and veterinary", "biology and biochemistry", "computers and it", "geography and regional studies", "language and literature", "philosophy and religion", "mathematics and statistics", "library and information science", "fine and performing arts", "chemistry and chemical technology", "physics and astronomy", "earth and planetary sciences", "management and planning", "mechanical engineering and materials", "psychology", "electrical and electronic engineering", "civil engineering", "architecture" ], "y": [ 3374, 545, 381, 378, 334, 333, 304, 287, 265, 247, 222, 217, 207, 201, 197, 170, 169, 145, 143, 137, 131, 129, 120, 119, 108, 84, 82, 77, 74 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Subject coverage OpenDOAR" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data = opendoar_subjects.groupby('repository_metadata.content_subjects')[['system_metadata.id']].count().sort_values('system_metadata.id', ascending=False)\n", "plot = [\n", " go.Bar(\n", " x=data.index,\n", " y=data['system_metadata.id'],\n", " ) \n", "] \n", "\n", "layout = go.Layout(\n", " title='Subject coverage OpenDOAR',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "\n", "fig = go.Figure(plot, layout).show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**ROAR**" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [], "source": [ "roar_subjects = roar_df.explode('subjects')" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "type": "bar", "x": [ "L1", "H1", "Q1", "T1", "K1", "S1", "R1", "HB", "TA", "B1", "AC", "QD", "JA", "Z665", "AS", "HG", "QC", "TJ", "BF", "QA", "AI", "TK", "HM", "Z719", "HA", "SB", "TD", "P1", "TP", "G1", "QK", "D1", "RT", "HN", "QA75", "SF", "BL", "QA76", "HJ", "QE", "LB", "HC", "SH", "TH", "GE", "QR", "SD", "QL", "AZ", "HF", "ZA", "ZA4050", "N1", "RS", "LB2300", "HD", "NX", "RA", "RK", "RZ", "HE", "TC", "PE", "M1", "HF5601", "Z004", "PN", "RB", "JF", "HD28", "TG", "J1", "JZ", "GN", "RM", "AM", "HT", "QH301", "TL", "QP", "RG", "RD", "RA0421", "QH", "JC", "QB", "TN", "BJ", "RC", "HV", "ZA4450", "TS", "LC", "QM", "TX", "JX", "C1", "RJ", "HQ", "RE", "BP", "E11", "TE", "QR355", "QR180", "TF", "JS", "LA", "F1201", "RF", "CC", "RL", "ML", "LB2361", "T201", "HD61", "NC", "LF", "BR", "MT", "ND", "BC", "QH426", "DS", "LT", "NE", "GF", "GC", "LG", "LB1501", "PR", "GR", "CT", "BD", "LE", "NB", "HX", "LB1603", "GB", "BH", "RX", "GA", "RV", "CB", "KZ", "NK", "RC0321", "BT", "SK", "RC0254", "BV", "RJ101", "PL", "TR", "RC1200", "U1", "D051", "BS", "TT", "DP", "LC5201", "JQ", "DK", "PA", "RA1001", "D901", "PC", "D204", "JV", "PB", "D111", "D880", "PD", "GT", "V1", "F001", "DE", "HS", "DR", "JN", "PS", "VM", "PG", "BV1460", "CD", "LD", "D839", "BX", "D731", "JL", "BQ", "GV", "D501", "PQ", "DT", "E151", "JK", "PF", "BM", "PI", "CD921", "PT", "PN0080", "D890", "DC", "CS", "DF", "PZ", "DD", "DAW", "F1001", "KF", "DA", "KD", "PN0441", "CN", "PN2000", "DU", "CR", "PJ", "PN1990", "DG", "DH", "DL", "DJK", "CJ", "PH", "PK", "DJ", "CE", "DB", "PM", "JN1187", "PN1993", "JN101", "PB1501", "KDC" ], "y": [ 348, 342, 231, 220, 214, 191, 190, 163, 161, 137, 131, 130, 129, 128, 116, 115, 114, 107, 106, 106, 105, 105, 99, 93, 85, 85, 83, 83, 82, 81, 80, 80, 80, 79, 78, 78, 77, 75, 74, 73, 73, 72, 70, 69, 68, 67, 64, 64, 63, 62, 60, 59, 58, 57, 55, 54, 53, 53, 52, 52, 52, 51, 50, 49, 49, 49, 47, 46, 45, 45, 45, 45, 45, 45, 44, 44, 43, 43, 43, 42, 42, 42, 42, 41, 41, 41, 40, 40, 39, 39, 39, 39, 38, 38, 37, 37, 37, 36, 36, 35, 35, 34, 34, 34, 33, 33, 32, 32, 32, 32, 32, 31, 31, 31, 31, 31, 30, 29, 29, 29, 29, 29, 28, 28, 28, 27, 27, 27, 27, 27, 27, 26, 26, 26, 26, 25, 25, 25, 24, 24, 24, 24, 23, 23, 23, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 19, 19, 19, 18, 18, 18, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 15, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 6, 6 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Subject coverage OpenDOAR" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data = roar_subjects.groupby('subjects')[['eprintid']].count().sort_values('eprintid', ascending=False)\n", "plot = [\n", " go.Bar(\n", " x=data.index,\n", " y=data['eprintid'],\n", " ) \n", "] \n", "\n", "layout = go.Layout(\n", " title='Subject coverage OpenDOAR',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "\n", "fig = go.Figure(plot, layout).show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**FAIRsharing**" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "fairsharing_subjects = fairsharing_df.explode('attributes.subjects')" ] }, { "cell_type": "code", "execution_count": 61, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "name": "FAIRsharing", "type": "bar", "x": [ "Life Science", "Biomedical Science", "Earth Science", "Genomics", "Environmental Science", "Oceanography", "Epidemiology", "Biology", "Biodiversity", "Atmospheric Science", "Genetics", "Health Science", "Virology", "Proteomics", "Bioinformatics", "Agriculture", "Geology", "Transcriptomics", "Clinical Studies", "Humanities and Social Sciences", "Preclinical Studies", "Chemistry", "Comparative Genomics", "Botany", "Data Management", "Medicine", "Natural Science", "Functional Genomics", "Meteorology", "Geophysics", "Systems Biology", "Social Science", "Ecology", "Engineering Science", "Geography", "Data Submission, Annotation and Curation", "Metabolomics", "Marine Biology", "Economics", "Physics", "Biochemistry", "Ontology and Terminology", "Hydrology", "Astrophysics and Astronomy", "Molecular biology", "Infectious Disease Medicine", "Medical Virology", "Computational Biology", "Neurobiology", "Phylogenetics", "Structural Biology", "Humanities", "Developmental Biology", "Computer Science", "Immunology", "Geochemistry", "Anatomy", "Social and Behavioural Science", "Microbiology", "Remote Sensing", "Epigenetics", "Critical Care Medicine", "Knowledge and Information Systems", "Public Health", "Plant Genetics", "Data Visualization", "Ecosystem Science", "Neuroscience", "Demographics", "Forest Management", "Data Integration", "Oncology", "Cell Biology", "Drug Discovery", "Materials Science", "Metagenomics", "Hydrogeology", "Soil Science", "Glycomics", "Population Genetics", "Plant Breeding", "Paleontology", "Water Research", "Immunogenetics", "Geodesy", "Taxonomy", "Water Management", "Energy Engineering", "Human Genetics", "Software Engineering", "Mineralogy", "Pharmacology", "Phylogenomics", "Computational Neuroscience", "Freshwater Science", "Global Health", "Medical Informatics", "Epigenomics", "Animal Genetics", "Statistics", "Evolutionary Biology", "Translational Medicine", "Psychology", "Culture", "Political Science", "Cheminformatics", "Phylogeny", "Nutritional Science", "Microbial Ecology", "Mathematics", "Phenomics", "Natural History", "Physical Geography", "Population Dynamics", "Nanotechnology", "Zoology", "Drug Development", "Education Science", "Enzymology", "Analytical Chemistry", "Classical Archaeology", "Neurophysiology", "Informatics", "Omics", "Data Governance", "Organic Chemistry", "Pathology", "Database Management", "Biotechnology", "History", "Food Security", "Maritime Engineering", "Plant Anatomy", "Bioengineering", "Urban Planning", "Cartography", "Art", "Civil Engineering", "Architecture", "Materials Engineering", "Geoinformatics", "Anthropology", "Animal Husbandry", "Molecular Chemistry", "Fine Arts", "Aerospace Engineering", "Business Administration", "Tropical Medicine", "Communication Science", "Cardiology", "Molecular Genetics", "Endocrinology", "Criminology", "Chemical Engineering", "Transportation Planning", "Linguistics", "Fisheries Science", "Thermodynamics", "Computational Chemistry", "Agronomy", "Toxicology", "Economic and Social History", "Respiratory Medicine", "Physiology", "Medicinal Chemistry", "Art History", "Geriatric Medicine", "Primary Health Care", "Synthetic Biology", "Reproductive Health", "Molecular Microbiology", "Aquaculture", "Pediatrics", "Pharmacogenomics", "Agricultural Engineering", "Prehistory", "Plant Ecology", "Physical Chemistry", "Agroecology", "Pharmacy", "Toxicogenomics", "Entomology", "Neurology", "Embryology", "Data Quality", "Geotechnics", "Drug Repositioning", "Data Mining", "Drug Metabolism", "Materials Informatics", "Inorganic Molecular Chemistry", "Health Services Research", "Construction Engineering", "Human Biology", "Structural Genomics", "Occupational Medicine", "Criminal Law", "Limnology", "Ancient History", "Jurisprudence", "Research on Teaching, Learning and Training", "Developmental Neurobiology", "Building Engineering Physics", "Cultural Studies", "Industrial Engineering", "Systemic Neuroscience", "Biomaterials", "Building Design", "Animal Physiology", "Rural and Agricultural Sociology", "Social Psychology", "Data Security", "Social Policy", "Synthetic Chemistry", "Classical Philology", "Philosophy", "Applied Microbiology", "Agricultural Economics", "Media Studies", "Composite Materials", "Medical Physics", "Power Engineering", "Veterinary Medicine", "Farming Systems Research", "Community Care", "Proteogenomics", "Digital Image Processing", "Public Finance", "Electrical Engineering", "Public Law", "Quantitative Genetics", "Historical Linguistics", "Molecular Dynamics", "Molecular Infection Biology", "Gastroenterology", "Ancient Cultures", "Molecular Physical Chemistry", "Human Geography", "Religious Studies", "Radiology", "Regenerative Medicine", "Rheumatology", "Applied Mathematics", "Safety Science", "Mechanical Engineering", "Social Medicine", "Theology", "Empirical Social Research", "Agricultural Law", "Food Process Engineering", "Forensic Medicine", "Electrophysiology", "Traditional Medicine", "Egyptology", "Functional Materials Research", "Theoretical Chemistry", "Telecommunication Engineering", "Applied Linguistics", "Technical Chemistry", "Economic Theory", "Synthesis Chemistry", "Surgery", "Structural Engineering", "Process Engineering", "Solid-State Chemistry", "Economic Policy", "Econometrics", "Animal Breeding", "Horticulture", "Gynecology", "Clinical Psychology", "Neurogenetics", "Chemical Biology", "Literary Studies", "Logistics Engineering", "Musculoskeletal Medicine", "Molecular Neuroscience", "Molecular Medicine", "Computer Architecture", "Modern History", "Clinical Chemistry", "Microstructural Mechanical Properties of Materials", "Microbial Physiology", "Cellular Neuroscience", "Microbial Genetics", "Clinical Veterinary Medicine", "Cognitive Neuroscience", "Metal-Cutting Manufacturing Engineering", "Materials Structuring and Functionalisation", "Medicines Research and Development", "Comparative Neurobiology", "Mechanical Behaviour of Construction Materials", "Component Engineering", "Medical Microbiology", "Mechanics", "Landscape Planning", "Nuclear Medicine", "Hematology", "Biological Psychology", "Artificial Intelligence", "Polymer Chemistry", "Atomic, Molecular, Optical and Plasma Physics", "Policy", "Plastics Engineering", "Behavioural Biology", "Plant Cultivation", "Plant Cell Biology", "History of Science", "Mechanical Process Engineering", "Biological Process Engineering", "Dermatology", "Obstetrics", "Biomimetic Chemistry", "Biophysics", "Photogrammetry", "Human-Machine Systems Engineering", "Biotherapeutics", "Personalized Medicine", "Hydraulic Engineering", "Particles, Nuclei and Fields", "Parasitology", "Organic Molecular Chemistry", "Ophthalmology", "Acoustics" ], "y": [ 864, 253, 230, 180, 136, 97, 94, 86, 82, 78, 75, 74, 68, 66, 61, 59, 52, 51, 48, 48, 47, 47, 46, 45, 45, 43, 43, 42, 42, 41, 40, 39, 35, 34, 33, 32, 31, 29, 28, 27, 26, 25, 25, 25, 23, 22, 21, 21, 21, 20, 20, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 11, 11, 11, 11, 10, 10, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Subject coverage FAIRsharing" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data = fairsharing_subjects.groupby('attributes.subjects')[['id']].count().sort_values('id', ascending=False)\n", "plot = [\n", " go.Bar(\n", " x=data.index,\n", " y=data['id'],\n", " name='FAIRsharing'\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Subject coverage FAIRsharing',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "\n", "fig = go.Figure(plot, layout).show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Geographic analysis" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**re3data**" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
re3data_idinstitutionorg_nameorg_other_namesorg_countryorg_3org_noprofitorg_urlorg_idsorg_date1org_date2org_contact
0r3d100000001[Odum Institute for Research in Social Science...Odum Institute for Research in Social Science[]USA[general]non-profithttps://odum.unc.edu/archive/[][]
1r3d100000002[The U.S. National Archives and Records Admini...The U.S. National Archives and Records Adminis...[NARA, National Archives]USA[general]non-profithttp://www.archives.gov/[][http://www.archives.gov/contact/]
2r3d100000002[The USA.gov, [], USA, [general], non-profit, ...The USA.gov[]USA[general]non-profithttp://www.usa.gov/[][http://www.usa.gov/Contact.shtml]
3r3d100000004[Institut für Deutsche Sprache, Archiv für Ges...Institut für Deutsche Sprache, Archiv für Gesp...[AGD]DEU[funding, general]non-profithttp://agd.ids-mannheim.de/index.shtml[]2004[agd@ids-mannheim.de]
4r3d100000005[Odum Institute for Research in Social Science...Odum Institute for Research in Social Science[]USA[technical]non-profithttps://odum.unc.edu/[][https://odum.unc.edu/contact/contact-form/, o...
\n", "
" ], "text/plain": [ " re3data_id institution \\\n", "0 r3d100000001 [Odum Institute for Research in Social Science... \n", "1 r3d100000002 [The U.S. National Archives and Records Admini... \n", "2 r3d100000002 [The USA.gov, [], USA, [general], non-profit, ... \n", "3 r3d100000004 [Institut für Deutsche Sprache, Archiv für Ges... \n", "4 r3d100000005 [Odum Institute for Research in Social Science... \n", "\n", " org_name \\\n", "0 Odum Institute for Research in Social Science \n", "1 The U.S. National Archives and Records Adminis... \n", "2 The USA.gov \n", "3 Institut für Deutsche Sprache, Archiv für Gesp... \n", "4 Odum Institute for Research in Social Science \n", "\n", " org_other_names org_country org_3 org_noprofit \\\n", "0 [] USA [general] non-profit \n", "1 [NARA, National Archives] USA [general] non-profit \n", "2 [] USA [general] non-profit \n", "3 [AGD] DEU [funding, general] non-profit \n", "4 [] USA [technical] non-profit \n", "\n", " org_url org_ids org_date1 org_date2 \\\n", "0 https://odum.unc.edu/archive/ [] \n", "1 http://www.archives.gov/ [] \n", "2 http://www.usa.gov/ [] \n", "3 http://agd.ids-mannheim.de/index.shtml [] 2004 \n", "4 https://odum.unc.edu/ [] \n", "\n", " org_contact \n", "0 [] \n", "1 [http://www.archives.gov/contact/] \n", "2 [http://www.usa.gov/Contact.shtml] \n", "3 [agd@ids-mannheim.de] \n", "4 [https://odum.unc.edu/contact/contact-form/, o... " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_institutions = re3data_df.explode('institution')[['re3data_id', 'institution']]\n", "re3data_institutions = re3data_institutions[~re3data_institutions.institution.isna()].reset_index(drop=True)\n", "re3data_institutions = re3data_institutions.join(pd.DataFrame(re3data_institutions.institution.to_list(), columns=['org_name', 'org_other_names', \n", " 'org_country', 'org_3', 'org_noprofit', \n", " 'org_url', 'org_ids', 'org_date1', \n", " 'org_date2', 'org_contact']))\n", "re3data_institutions.head()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "re3data_institutions['org_continent'] = re3data_institutions.org_country.map(countrycode_to_continent)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['AAA', 'EEC'], dtype=object)" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_institutions[re3data_institutions.org_continent.isna()].org_country.unique()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "AAA is used for international collaborations; we skip this.\n", "EEC is used for the EU commission; we fix the continent manually." ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "re3data_institutions.loc[re3data_institutions.org_country == 'EEC', 'org_continent'] = 'EU'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**OpenDOAR**" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
opendoar_idinstitutionorg_nameorg_other_namesorg_countryorg_3org_noprofitorg_urlorg_idsorg_date1org_date2org_contact
0101[university of utrecht, [universiteit utrecht]...university of utrecht[universiteit utrecht]NLD[]https://www.uu.nl[https://ror.org/04pp8hn57][]
1115[indian institute of management kozhikode, [ii...indian institute of management kozhikode[iimk]IND[]http://www.iimk.ac.in/[https://ror.org/03m1xdc36][]
241[california institute of technology, [caltech]...california institute of technology[caltech]USA[]http://www.caltech.edu/[https://ror.org/05dxps055][]
3119[dublin city university, [dcu], ie, [], , http...dublin city university[dcu]IRL[]http://www.dcu.ie/[https://ror.org/04a1a1e81][]
4129[istituto nazionale di geofisica e vulcanologi...istituto nazionale di geofisica e vulcanologia[ingv]ITA[]http://www.ingv.it[https://ror.org/00qps9a02][]
\n", "
" ], "text/plain": [ " opendoar_id institution \\\n", "0 101 [university of utrecht, [universiteit utrecht]... \n", "1 115 [indian institute of management kozhikode, [ii... \n", "2 41 [california institute of technology, [caltech]... \n", "3 119 [dublin city university, [dcu], ie, [], , http... \n", "4 129 [istituto nazionale di geofisica e vulcanologi... \n", "\n", " org_name org_other_names \\\n", "0 university of utrecht [universiteit utrecht] \n", "1 indian institute of management kozhikode [iimk] \n", "2 california institute of technology [caltech] \n", "3 dublin city university [dcu] \n", "4 istituto nazionale di geofisica e vulcanologia [ingv] \n", "\n", " org_country org_3 org_noprofit org_url \\\n", "0 NLD [] https://www.uu.nl \n", "1 IND [] http://www.iimk.ac.in/ \n", "2 USA [] http://www.caltech.edu/ \n", "3 IRL [] http://www.dcu.ie/ \n", "4 ITA [] http://www.ingv.it \n", "\n", " org_ids org_date1 org_date2 org_contact \n", "0 [https://ror.org/04pp8hn57] [] \n", "1 [https://ror.org/03m1xdc36] [] \n", "2 [https://ror.org/05dxps055] [] \n", "3 [https://ror.org/04a1a1e81] [] \n", "4 [https://ror.org/00qps9a02] [] " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_institutions = opendoar_df.explode('institution')[['opendoar_id', 'institution']]\n", "opendoar_institutions = opendoar_institutions[~opendoar_institutions.institution.isna()].reset_index(drop=True)\n", "opendoar_institutions = opendoar_institutions.join(pd.DataFrame(opendoar_institutions.institution.to_list(), columns=['org_name', 'org_other_names', \n", " 'org_country', 'org_3', 'org_noprofit', \n", " 'org_url', 'org_ids', 'org_date1', \n", " 'org_date2', 'org_contact']))\n", "opendoar_institutions['org_country'] = opendoar_institutions.org_country.map(str.upper, na_action='ignore')\n", "opendoar_institutions['org_country'] = opendoar_institutions.org_country.map(countrycode_iso2_to_countrycode_iso3, na_action='ignore')\n", "opendoar_institutions.head()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "opendoar_institutions['org_continent'] = opendoar_institutions.org_country.map(countrycode_to_continent)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([nan, 'UMI'], dtype=object)" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_institutions[opendoar_institutions.org_continent.isna()].org_country.unique()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
opendoar_idinstitutionorg_nameorg_other_namesorg_countryorg_3org_noprofitorg_urlorg_idsorg_date1org_date2org_contactorg_continent
43495379[kettering university, [], um, [], , https://w...kettering university[]UMI[]https://www.kettering.edu[https://ror.org/03rcspa57][]NA
\n", "
" ], "text/plain": [ " opendoar_id institution \\\n", "4349 5379 [kettering university, [], um, [], , https://w... \n", "\n", " org_name org_other_names org_country org_3 org_noprofit \\\n", "4349 kettering university [] UMI [] \n", "\n", " org_url org_ids org_date1 \\\n", "4349 https://www.kettering.edu [https://ror.org/03rcspa57] \n", "\n", " org_date2 org_contact org_continent \n", "4349 [] NA " ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_institutions.loc[opendoar_institutions.org_country == 'UMI', 'org_continent'] = 'NA'\n", "opendoar_institutions[opendoar_institutions.org_country == 'UMI']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**ROAR**" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "roar_df['location_country'] = roar_df.location_country.map(str.upper, na_action='ignore')\n", "roar_df['location_country'] = roar_df.location_country.map(countrycode_iso2_to_countrycode_iso3)\n", "roar_df['continent'] = roar_df.location_country.map(countrycode_to_continent)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**FAIRsharing**" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "fairsharing_df['subjects'] = fairsharing_df.subjects.str.split(pat=',')\n", "fairsharing_df['countries'] = fairsharing_df.countries.str.split(pat=',')\n", "\n", "fairsharing_countries = fairsharing_df.explode('countries')\n", "fairsharing_countries['countrycode'] = fairsharing_countries.countries.map(country_to_countrycode)\n", "fairsharing_countries['continent'] = fairsharing_countries.countrycode.map(countrycode_to_continent)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['European Union', 'Republic of Ireland', 'Worldwide', nan],\n", " dtype=object)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fairsharing_countries[fairsharing_countries.countrycode.isna()].countries.unique()" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['European Union', 'Republic of Ireland', 'Worldwide', 'Antarctica',\n", " nan], dtype=object)" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fairsharing_countries[fairsharing_countries.continent.isna()].countries.unique()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Fix manually some rows" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "fairsharing_countries.loc[fairsharing_countries.countries == 'Republic of Ireland', ['countries', 'countrycode', 'continent']] = ['Ireland', 'IE', 'EU']\n", "fairsharing_countries.loc[fairsharing_countries.countries == 'European Union', ['countrycode', 'continent']] = ['EU', 'EU']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Make Antactica disappear (only one repo)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
full_nameshort_namefs_urlurlcountriessubjectscountrycodecontinent
915Antabif IPT - AntOBIS IPT - GBIF BelgiumAntabif IPT - AntOBIS IPT - GBIF Belgiumhttps://fairsharing.org/10.25504/FAIRsharing.e...http://ipt.biodiversity.aq/Antarctica[Biodiversity, Life Science]AQNaN
\n", "
" ], "text/plain": [ " full_name \\\n", "915 Antabif IPT - AntOBIS IPT - GBIF Belgium \n", "\n", " short_name \\\n", "915 Antabif IPT - AntOBIS IPT - GBIF Belgium \n", "\n", " fs_url \\\n", "915 https://fairsharing.org/10.25504/FAIRsharing.e... \n", "\n", " url countries subjects \\\n", "915 http://ipt.biodiversity.aq/ Antarctica [Biodiversity, Life Science] \n", "\n", " countrycode continent \n", "915 AQ NaN " ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fairsharing_countries.loc[fairsharing_countries.countries == 'Antarctica', ['countrycode', 'continent']] = ['AQ', np.nan]\n", "fairsharing_countries[fairsharing_countries.countrycode == 'AQ']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Country coverage" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "name": "re3data", "type": "bar", "x": [ "USA", "DEU", "CAN", "GBR", "EEC", "AAA", "FRA", "AUS", "CHE", "JPN", "NLD", "ESP", "IND", "CHN", "ITA", "AUT", "NOR", "SWE", "BEL", "DNK", "POL", "RUS", "GRC", "ZAF", "CZE", "IRL", "MEX", "BRA", "TWN", "FIN", "PRT", "NZL", "EST", "KOR", "COL", "SRB", "LTU", "ARG", "SGP", "HUN", "ISR", "TUR", "SVN", "ISL", "KEN", "UKR", "ROU", "IDN", "LUX", "SVK", "HKG", "PER", "PAK", "LVA", "GRL", "THA", "CHL", "BEN", "PAN", "CMR", "CYP", "MKD", "SDN", "HRV", "BFA", "TUN", "CIV", "GHA", "SEN", "PYF", "PHL", "NCL", "NAM", "LBN", "KAZ", "FJI", "AZE", "LKA" ], "y": [ 2690, 1040, 573, 514, 349, 294, 233, 215, 118, 113, 111, 88, 79, 74, 62, 54, 52, 39, 39, 38, 33, 33, 31, 22, 21, 21, 18, 17, 16, 15, 15, 15, 14, 14, 13, 11, 11, 9, 9, 9, 8, 7, 7, 6, 6, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] }, { "name": "openDOAR", "type": "bar", "visible": "legendonly", "x": [ "USA", "JPN", "GBR", "DEU", "ESP", "PER", "TUR", "IDN", "FRA", "BRA", "HRV", "ITA", "POL", "UKR", "CAN", "IND", "COL", "AUS", "NLD", "ARG", "NOR", "CHN", "PRT", "TWN", "MEX", "SWE", "RUS", "AUT", "HUN", "ZAF", "SRB", "KEN", "KOR", "GRC", "ECU", "BLR", "CHE", "BEL", "NGA", "IRL", "CHL", "CZE", "MYS", "FIN", "DZA", "NZL", "LTU", "IRN", "THA", "VEN", "LKA", "DNK", "BGD", "TZA", "CUB", "KAZ", "SVN", "MDA", "SDN", "UGA", "SAU", "NIC", "ZWE", "BGR", "CRI", "HKG", "SLV", "PHL", "URY", "EST", "EGY", "SGP", "PSE", "CYP", "JAM", "GHA", "ROU", "PAN", "LVA", "MKD", "ETH", "PAK", "SVK", "SEN", "HND", "ISL", "DOM", "LUX", "MMR", "ARE", "MAR", "BWA", "LBY", "GEO", "LBN", "ZMB", "IRQ", "ARM", "AZE", "RWA", "LSO", "MOZ", "PRY", "BIH", "TUN", "BOL", "FJI", "NAM", "CPV", "UMI", "VNM", "TTO", "AFG", "SOM", "QAT", "PRI", "NPL", "NCL", "MWI", "MLT", "LAO", "ALA", "KGZ", "GTM", "GLP", "CMR", "AND", "ALB", "KWT" ], "y": [ 910, 682, 315, 280, 175, 162, 161, 156, 154, 151, 148, 141, 124, 105, 99, 98, 97, 91, 75, 72, 67, 60, 60, 60, 50, 50, 48, 46, 44, 44, 44, 43, 41, 38, 37, 37, 35, 33, 30, 30, 27, 26, 25, 22, 20, 19, 18, 18, 17, 16, 16, 15, 15, 14, 14, 12, 12, 12, 12, 11, 11, 11, 11, 11, 10, 9, 9, 9, 9, 8, 8, 7, 7, 6, 6, 6, 6, 6, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] }, { "name": "ROAR", "type": "bar", "visible": "legendonly", "x": [ "USA", "GBR", "DEU", "JPN", "ESP", "BRA", "TUR", "IDN", "PER", "POL", "IND", "COL", "UKR", "FRA", "CAN", "ITA", "CHN", "AUS", "TWN", "SWE", "RUS", "ARG", "PRT", "NOR", "KOR", "MEX", "ZAF", "NLD", "HUN", "MYS", "GRC", "SRB", "BEL", "BLR", "ECU", "CHE", "KEN", "CHL", "IRL", "AUT", "VEN", "FIN", "NZL", "ROU", "DNK", "CZE", "IRN", "MDA", "NGA", "DZA", "CUB", "THA", "SLV", "SDN", "PHL", "ZWE", "EGY", "LTU", "BGD", "BGR", "KAZ", "SVN", "HKG", "CRI", "SAU", "TZA", "CYP", "URY", "NIC", "UGA", "SGP", "HRV", "PSE", "GHA", "PAK", "EST", "AZE", "LVA", "DOM", "MAR", "BWA", "KGZ", "PAN", "JAM", "ETH", "BIH", "BOL", "LBN", "NPL", "MKD", "IRQ", "FJI", "SEN", "SVK", "LBY", "LKA", "LSO", "LUX", "ISL", "TUN", "DMA", "ARM", "MOZ", "NAM", "GEO", "UMI", "WSM", "ARE", "PRI", "SYR", "CMR", "SOM", "RWA", "QAT", "PRK", "HND", "MTQ", "MLT", "ALB", "ISR", "AFG" ], "y": [ 873, 258, 258, 234, 195, 181, 158, 151, 149, 126, 122, 119, 111, 101, 97, 96, 91, 82, 80, 76, 68, 65, 59, 56, 53, 48, 48, 46, 45, 41, 39, 38, 37, 35, 29, 29, 29, 27, 26, 26, 23, 23, 22, 17, 17, 16, 16, 16, 15, 14, 14, 13, 13, 13, 13, 12, 12, 11, 11, 11, 10, 10, 10, 9, 9, 8, 7, 7, 7, 7, 7, 7, 7, 6, 6, 5, 5, 5, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] }, { "name": "FAIRsharing", "type": "bar", "visible": "legendonly", "x": [ "USA", "GBR", "DEU", "FRA", "CHE", "CHN", "NLD", "ITA", "CAN", "BEL", "ESP", "JPN", "SWE", "CZE", "NOR", "DNK", "EU", "AUT", "FIN", "IE", "AUS", "ISR", "PRT", "HUN", "GRC", "MLT", "LTU", "ISL", "LUX", "SVK", "MNE", "HRV", "IND", "POL", "KOR", "SGP", "ZAF", "RUS", "NZL", "TWN", "MEX", "BRA", "SAU", "HKG", "ARG", "TUR", "BGR", "EST", "MAR", "ROU", "UGA", "CYP", "PAK", "THA", "CRI", "SLV", "TGO", "URY", "PAN", "AQ", "NIC", "NGA", "BEN", "CHL", "CMR", "COL", "EGY", "ETH", "FRO", "GRL", "HND", "IDN", "ARE", "KEN", "LVA", "MDG", "MLI", "MOZ", "MRT", "MWI", "NER", "ZWE" ], "y": [ 686, 248, 192, 162, 114, 99, 96, 91, 86, 83, 83, 80, 76, 71, 69, 67, 66, 64, 63, 62, 62, 61, 60, 59, 58, 53, 52, 52, 52, 52, 51, 51, 32, 11, 10, 10, 9, 9, 8, 8, 8, 8, 6, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Country coverage" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data1 = re3data_institutions.groupby('org_country')[['re3data_id']].count().sort_values('re3data_id', ascending=False)\n", "data2 = opendoar_institutions.groupby('org_country')[['opendoar_id']].count().sort_values('opendoar_id', ascending=False)\n", "data3 = roar_df.groupby('location_country')[['eprintid']].count().sort_values('eprintid', ascending=False)\n", "data4 = fairsharing_countries.groupby('countrycode')[['url']].count().sort_values('url', ascending=False)\n", "\n", "plot = [\n", " go.Bar(\n", " x=data1.index,\n", " y=data1['re3data_id'],\n", " name='re3data'\n", " ),\n", " go.Bar(\n", " x=data2.index,\n", " y=data2['opendoar_id'],\n", " name='openDOAR',\n", " visible = 'legendonly'\n", " ),\n", " go.Bar(\n", " x=data3.index,\n", " y=data3['eprintid'],\n", " name='ROAR',\n", " visible = 'legendonly'\n", " ),\n", " go.Bar(\n", " x=data4.index,\n", " y=data4['url'],\n", " name='FAIRsharing',\n", " visible = 'legendonly'\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Country coverage',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "\n", "go.Figure(plot, layout).show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Continental coverage" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "fill": "toself", "name": "re3data", "r": [ 41, 341, 2987, 3286, 233, 45 ], "theta": [ "AF", "AS", "EU", "NA", "OC", "SA" ], "type": "scatterpolar" }, { "fill": "toself", "name": "OpenDOAR", "r": [ 234, 1439, 2207, 1128, 113, 575 ], "theta": [ "AF", "AS", "EU", "NA", "OC", "SA" ], "type": "scatterpolar" }, { "fill": "toself", "name": "ROAR", "r": [ 190, 1069, 1877, 1078, 107, 603 ], "theta": [ "AF", "AS", "EU", "NA", "OC", "SA" ], "type": "scatterpolar" }, { "fill": "toself", "name": "FAIRsharing", "r": [ 27, 320, 2176, 787, 70, 14 ], "theta": [ "AF", "AS", "EU", "NA", "OC", "SA" ], "type": "scatterpolar" } ], "layout": { "polar": { "radialaxis": { "visible": true } }, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "data1 = re3data_institutions.groupby('org_continent')[['re3data_id']].count()\n", "data2 = opendoar_institutions.groupby('org_continent')[['opendoar_id']].count()\n", "data3 = roar_df.groupby('continent')[['eprintid']].count()\n", "data4 = fairsharing_countries.groupby('continent')[['url']].count()\n", "\n", "plot = [\n", " go.Scatterpolar(\n", " r=data1.re3data_id,\n", " theta=data1.index,\n", " fill='toself',\n", " name='re3data'),\n", " go.Scatterpolar(\n", " r=data2.opendoar_id,\n", " theta=data2.index,\n", " fill='toself',\n", " name='OpenDOAR'),\n", " go.Scatterpolar(\n", " r=data3.eprintid,\n", " theta=data3.index,\n", " fill='toself',\n", " name='ROAR'),\n", " go.Scatterpolar(\n", " r=data4.url,\n", " theta=data4.index,\n", " fill='toself',\n", " name='FAIRsharing')\n", "]\n", "\n", "layout = go.Layout(polar=dict(\n", " radialaxis=dict(\n", " visible=True\n", " ),\n", " )\n", ")\n", "\n", "go.Figure(plot, layout).show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }