{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import ast\n", "import csv\n", "import json\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px\n", "\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading datasets" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | system_metadata.id | \n", "repository_metadata.name | \n", "repository_metadata.alternativename | \n", "repository_metadata.url | \n", "repository_metadata.description | \n", "repository_metadata.type | \n", "repository_metadata.content_languages | \n", "system_metadata.date_modified | \n", "system_metadata.date_created | \n", "repository_metadata.content_subjects | \n", "repository_metadata.content_types | \n", "organization | \n", "policy_urls | \n", "repository_metadata.software | \n", "repository_metadata.oai_url | \n", "system_metadata.publicly_visible | \n", "repository_metadata.repository_status | \n", "repository_metadata.fulltext_record_count | \n", "repository_metadata.metadata_record_count | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "175 | \n", "{\"name\": \"hku theses online\", \"language\": \"en\"} | \n", "[] | \n", "http://hub.hku.hk/handle/10722/1057 | \n", "this is an institutional repository providing ... | \n", "institutional | \n", "[\"zh\", \"en\"] | \n", "2021-03-25 10:16:18 | \n", "2005-12-21 12:44:08 | \n", "[\"multidisciplinary\"] | \n", "[bibliographic_references, theses_and_disserta... | \n", "[{'name': 'university of hong kong', 'alternat... | \n", "[] | \n", "{\"name\": \"dspace\", \"version\": \"cris-5.3.1-snap... | \n", "NaN | \n", "yes | \n", "fully_functional | \n", "NaN | \n", "11850.0 | \n", "
1 | \n", "64 | \n", "{\"name\": \"research support scheme - central eu... | \n", "[] | \n", "http://rss.archives.ceu.hu/ | \n", "this is an institutional repository collecting... | \n", "institutional | \n", "[\"cs\", \"en\", \"hu\", \"ru\"] | \n", "2021-03-25 09:48:31 | \n", "2006-01-04 14:59:30 | \n", "[\"multidisciplinary\"] | \n", "[unpub_reports_and_working_papers] | \n", "[{'name': 'central european university', 'alte... | \n", "[] | \n", "{\"name\": \"eprints\", \"version\": \"2.2.1\"} | \n", "http://rss.archives.ceu.hu/perl/oai2 | \n", "yes | \n", "fully_functional | \n", "NaN | \n", "164.0 | \n", "
2 | \n", "151 | \n", "{\"name\": \"cadmus, eui research repository\", \"l... | \n", "[] | \n", "http://cadmus.eui.eu/ | \n", "cadmus is the name of the eui research reposit... | \n", "institutional | \n", "[\"nl\", \"en\", \"fr\", \"de\", \"it\"] | \n", "2021-09-13 13:35:36 | \n", "2006-01-04 12:07:07 | \n", "[\"history and archaeology\", \"multidisciplinary... | \n", "[journal_articles, theses_and_dissertations, u... | \n", "[{'name': 'european university institute', 'al... | \n", "[{\"policy_url\": \"https://www.eui.eu/research/e... | \n", "{\"name\": \"dspace\", \"version\": \"5.2\"} | \n", "http://cadmus.eui.eu/oai/request | \n", "yes | \n", "fully_functional | \n", "3867.0 | \n", "24869.0 | \n", "
3 | \n", "105 | \n", "{\"name\": \"document server@uhasselt\", \"language... | \n", "[] | \n", "https://doclib.uhasselt.be/dspace/ | \n", "this site is a university repository providing... | \n", "institutional | \n", "[\"nl\", \"en\", \"fr\", \"de\"] | \n", "2021-04-16 15:23:52 | \n", "2006-01-24 15:46:44 | \n", "[\"multidisciplinary\"] | \n", "[journal_articles, conference_and_workshop_pap... | \n", "[{'name': 'uhasselt', 'alternativeName': 'hass... | \n", "[] | \n", "{\"name\": \"dspace\", \"version\": \"1.7.2\"} | \n", "http://doclib.uhasselt.be/dspace-oai/request | \n", "yes | \n", "fully_functional | \n", "0.0 | \n", "27376.0 | \n", "
4 | \n", "101 | \n", "{\"name\": \"utrecht university repository\", \"lan... | \n", "[] | \n", "http://dspace.library.uu.nl | \n", "this site is a university repository providing... | \n", "institutional | \n", "[\"nl\", \"en\"] | \n", "2021-04-16 15:22:03 | \n", "2006-01-13 12:55:13 | \n", "[\"multidisciplinary\"] | \n", "[journal_articles, conference_and_workshop_pap... | \n", "[{'name': 'university of utrecht', 'alternativ... | \n", "[] | \n", "{\"name\": \"dspace\", \"version\": \"\"} | \n", "https://dspace.library.uu.nl/oai/request | \n", "yes | \n", "fully_functional | \n", "1686.0 | \n", "185637.0 | \n", "
\n", " | system_metadata.id | \n", "repository_metadata.name | \n", "repository_metadata.alternativename | \n", "repository_metadata.url | \n", "repository_metadata.description | \n", "repository_metadata.type | \n", "repository_metadata.content_languages | \n", "system_metadata.date_modified | \n", "system_metadata.date_created | \n", "repository_metadata.content_subjects | \n", "repository_metadata.content_types | \n", "organization | \n", "policy_urls | \n", "repository_metadata.software | \n", "repository_metadata.oai_url | \n", "system_metadata.publicly_visible | \n", "repository_metadata.repository_status | \n", "repository_metadata.fulltext_record_count | \n", "repository_metadata.metadata_record_count | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "5742 | \n", "5742 | \n", "2147 | \n", "5742 | \n", "5421 | \n", "5742 | \n", "5742 | \n", "5742 | \n", "5742 | \n", "5742 | \n", "5598 | \n", "5742 | \n", "5742 | \n", "5742 | \n", "4402 | \n", "5742 | \n", "5595 | \n", "2.299000e+03 | \n", "4.197000e+03 | \n", "
unique | \n", "5742 | \n", "5713 | \n", "2107 | \n", "5705 | \n", "4619 | \n", "4 | \n", "330 | \n", "2372 | \n", "5573 | \n", "821 | \n", "477 | \n", "5201 | \n", "642 | \n", "321 | \n", "4370 | \n", "1 | \n", "7 | \n", "NaN | \n", "NaN | \n", "
top | \n", "175 | \n", "{\"name\": \"hiroshima associated repository port... | \n", "[{'acronym': 'aura'}] | \n", "http://harp.lib.hiroshima-u.ac.jp/ | \n", "this site provides access to the research outp... | \n", "institutional | \n", "[\"en\"] | \n", "2020-09-18 12:53:48 | \n", "2020-09-18 12:53:48 | \n", "[\"multidisciplinary\"] | \n", "[theses_and_dissertations] | \n", "[{'name': 'rijksuniversiteit groningen', 'alte... | \n", "[] | \n", "{\"name\": \"dspace\", \"version\": \"\"} | \n", "https://kidoks.bsz-bw.de/oai | \n", "yes | \n", "fully_functional | \n", "NaN | \n", "NaN | \n", "
freq | \n", "1 | \n", "3 | \n", "4 | \n", "3 | \n", "95 | \n", "5096 | \n", "1917 | \n", "82 | \n", "82 | \n", "3227 | \n", "465 | \n", "26 | \n", "5098 | \n", "822 | \n", "3 | \n", "5742 | \n", "5276 | \n", "NaN | \n", "NaN | \n", "
mean | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "5.010186e+03 | \n", "1.760546e+05 | \n", "
std | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "4.206295e+04 | \n", "6.600825e+06 | \n", "
min | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000000e+00 | \n", "0.000000e+00 | \n", "
25% | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.000000e+00 | \n", "8.950000e+02 | \n", "
50% | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "4.220000e+02 | \n", "4.026000e+03 | \n", "
75% | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2.930500e+03 | \n", "1.630400e+04 | \n", "
max | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "1.817531e+06 | \n", "4.200000e+08 | \n", "