{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import ast\n", "import csv\n", "import json\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px\n", "\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading datasets" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | system_metadata.id | \n", "repository_metadata.name | \n", "repository_metadata.alternativename | \n", "repository_metadata.url | \n", "repository_metadata.description | \n", "repository_metadata.type | \n", "repository_metadata.content_languages | \n", "system_metadata.date_modified | \n", "system_metadata.date_created | \n", "repository_metadata.content_subjects | \n", "repository_metadata.content_types | \n", "organization | \n", "policy_urls | \n", "repository_metadata.software | \n", "repository_metadata.oai_url | \n", "system_metadata.publicly_visible | \n", "repository_metadata.repository_status | \n", "repository_metadata.fulltext_record_count | \n", "repository_metadata.metadata_record_count | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "134 | \n", "{\"name\": \"eldorado - repository of the tu dort... | \n", "[{'name': 'eldorado - ressourcen aus und für l... | \n", "https://eldorado.tu-dortmund.de | \n", "NaN | \n", "institutional | \n", "[] | \n", "2022-01-12 15:34:54 | \n", "2005-12-19 14:57:52 | \n", "[arts, humanities, science, mathematics, socia... | \n", "[journal_articles, conference_and_workshop_pap... | \n", "[{'name': 'technische universität dortmund', '... | \n", "[] | \n", "{\"name\": \"dspace\", \"version\": \"\"} | \n", "https://eldorado.tu-dortmund.de/oai/request | \n", "yes | \n", "NaN | \n", "9629.0 | \n", "20963.0 | \n", "
1 | \n", "58 | \n", "{\"name\": \"archive ouverte en sciences de linfo... | \n", "[{'acronym': '@rchivesic'}] | \n", "https://archivesic.ccsd.cnrs.fr | \n", "NaN | \n", "institutional | \n", "[] | \n", "2022-01-12 15:34:53 | \n", "2006-01-13 12:48:32 | \n", "[arts, science, technology, engineering, mathe... | \n", "[journal_articles, conference_and_workshop_pap... | \n", "[{'name': 'centre pour la communication scient... | \n", "[] | \n", "{\"name\": \"hal\", \"version\": \"\"} | \n", "https://api.archives-ouvertes.fr/oai/archivesic | \n", "yes | \n", "NaN | \n", "55492.0 | \n", "1137498.0 | \n", "
2 | \n", "93 | \n", "{\"name\": \"digitalcommons@the texas medical cen... | \n", "[] | \n", "http://digitalcommons.library.tmc.edu/ | \n", "NaN | \n", "institutional | \n", "[] | \n", "2022-01-12 15:34:53 | \n", "2006-02-14 11:16:12 | \n", "[health and medicine] | \n", "[journal_articles, theses_and_dissertations] | \n", "[{'name': 'texas medical center', 'alternative... | \n", "[] | \n", "{\"name\": \"other\", \"version\": \"\"} | \n", "http://digitalcommons.library.tmc.edu/do/oai/ | \n", "yes | \n", "NaN | \n", "2658.0 | \n", "7268.0 | \n", "
3 | \n", "68 | \n", "{\"name\": \"cognitive sciences eprint archive\", ... | \n", "[{'acronym': 'cogprints'}] | \n", "http://cogprints.org/ | \n", "NaN | \n", "disciplinary | \n", "[] | \n", "2022-01-12 15:34:53 | \n", "2006-01-04 15:01:23 | \n", "[humanities, health and medicine, science, soc... | \n", "[journal_articles, conference_and_workshop_pap... | \n", "[{'name': 'university of southampton', 'altern... | \n", "[] | \n", "{\"name\": \"eprints\", \"version\": \"\"} | \n", "http://cogprints.org/cgi/oai2 | \n", "yes | \n", "NaN | \n", "2895.0 | \n", "4277.0 | \n", "
4 | \n", "84 | \n", "{\"name\": \"digital commons@carleton college\", \"... | \n", "[] | \n", "http://digitalcommons.carleton.edu/ | \n", "NaN | \n", "institutional | \n", "[] | \n", "2022-01-12 15:34:53 | \n", "2006-01-04 16:07:58 | \n", "[humanities, science, social sciences] | \n", "[journal_articles, unpub_reports_and_working_p... | \n", "[{'name': 'carleton college', 'alternativeName... | \n", "[] | \n", "{\"name\": \"other\", \"version\": \"\"} | \n", "NaN | \n", "yes | \n", "NaN | \n", "NaN | \n", "42.0 | \n", "
\n", " | system_metadata.id | \n", "repository_metadata.name | \n", "repository_metadata.alternativename | \n", "repository_metadata.url | \n", "repository_metadata.description | \n", "repository_metadata.type | \n", "repository_metadata.content_languages | \n", "system_metadata.date_modified | \n", "system_metadata.date_created | \n", "repository_metadata.content_subjects | \n", "repository_metadata.content_types | \n", "organization | \n", "policy_urls | \n", "repository_metadata.software | \n", "repository_metadata.oai_url | \n", "system_metadata.publicly_visible | \n", "repository_metadata.repository_status | \n", "repository_metadata.fulltext_record_count | \n", "repository_metadata.metadata_record_count | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "5811 | \n", "5811 | \n", "2155 | \n", "5810 | \n", "0.0 | \n", "5810 | \n", "5811 | \n", "5811 | \n", "5811 | \n", "5644 | \n", "5667 | \n", "5811 | \n", "5811 | \n", "5811 | \n", "4447 | \n", "5811 | \n", "0.0 | \n", "2.292000e+03 | \n", "4.184000e+03 | \n", "
unique | \n", "5811 | \n", "5780 | \n", "2115 | \n", "5772 | \n", "NaN | \n", "4 | \n", "1 | \n", "171 | \n", "5643 | \n", "236 | \n", "476 | \n", "5212 | \n", "678 | \n", "32 | \n", "4415 | \n", "1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
top | \n", "134 | \n", "{\"name\": \"arch\", \"language\": \"en\"} | \n", "[{'acronym': 'aura'}] | \n", "http://harp.lib.hiroshima-u.ac.jp/ | \n", "NaN | \n", "institutional | \n", "[] | \n", "2022-01-12 15:35:47 | \n", "2020-09-18 12:53:48 | \n", "[science, technology, engineering, mathematics... | \n", "[theses_and_dissertations] | \n", "[{'name': 'rijksuniversiteit groningen', 'alte... | \n", "[] | \n", "{\"name\": \"dspace\", \"version\": \"\"} | \n", "https://api.figshare.com/v2/oai | \n", "yes | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
freq | \n", "1 | \n", "3 | \n", "4 | \n", "3 | \n", "NaN | \n", "5161 | \n", "5811 | \n", "73 | \n", "81 | \n", "3321 | \n", "469 | \n", "26 | \n", "5131 | \n", "2273 | \n", "3 | \n", "5811 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
mean | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "5.022890e+03 | \n", "1.765556e+05 | \n", "
std | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "4.212648e+04 | \n", "6.611068e+06 | \n", "
min | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000000e+00 | \n", "0.000000e+00 | \n", "
25% | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000000e+00 | \n", "8.937500e+02 | \n", "
50% | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "4.225000e+02 | \n", "4.012500e+03 | \n", "
75% | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2.931500e+03 | \n", "1.629350e+04 | \n", "
max | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.817531e+06 | \n", "4.200000e+08 | \n", "