{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import ast\n", "import csv\n", "import json\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px\n", "\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading datasets" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | openaire_id | \n", "opendoar_id | \n", "repository_name | \n", "additional_name | \n", "repository_url | \n", "description | \n", "type | \n", "update_date | \n", "start_date | \n", "subject | \n", "content_type | \n", "institution | \n", "metadata_policy | \n", "data_policy | \n", "submission_policy | \n", "content_policy | \n", "software | \n", "api | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "opendoar____::38b3eff8baf56627478ec76a704e9b52 | \n", "101 | \n", "utrecht university repository | \n", "[] | \n", "http://dspace.library.uu.nl | \n", "this site is a university repository providing... | \n", "institutional | \n", "2021-04-16 15:22:03 | \n", "2006-01-13 12:55:13 | \n", "[multidisciplinary] | \n", "[journal_articles, conference_and_workshop_pap... | \n", "[[university of utrecht, [universiteit utrecht... | \n", "True | \n", "True | \n", "False | \n", "True | \n", "dspace | \n", "true | \n", "
1 | \n", "opendoar____::2b44928ae11fb9384c4cf38708677c48 | \n", "115 | \n", "dspace at indian institute of management kozhi... | \n", "[dspace@iimk] | \n", "http://dspace.iimk.ac.in/ | \n", "this site is a subject based university reposi... | \n", "institutional | \n", "2021-02-18 17:36:43 | \n", "2006-01-04 11:54:34 | \n", "[ecology and environment, social sciences gene... | \n", "[journal_articles, conference_and_workshop_pap... | \n", "[[indian institute of management kozhikode, [i... | \n", "True | \n", "True | \n", "True | \n", "True | \n", "dspace 4.1 | \n", "true | \n", "
2 | \n", "opendoar____::3416a75f4cea9109507cacd8e2f2aefc | \n", "41 | \n", "caltech engineering and science online | \n", "[] | \n", "http://calteches.library.caltech.edu/ | \n", "the caltech archives holds approximately 220 c... | \n", "institutional | \n", "2021-02-18 17:36:28 | \n", "2006-01-04 14:47:04 | \n", "[biology and biochemistry, chemistry and chemi... | \n", "[journal_articles, conference_and_workshop_pap... | \n", "[[california institute of technology, [caltech... | \n", "True | \n", "True | \n", "True | \n", "True | \n", "eprints 3.1.3 | \n", "true | \n", "
3 | \n", "opendoar____::07e1cd7dca89a1678042477183b7ac3f | \n", "119 | \n", "dcu online research access service | \n", "[doras] | \n", "http://doras.dcu.ie/ | \n", "this site is an institutional repository provi... | \n", "institutional | \n", "2021-02-18 17:36:44 | \n", "2006-01-04 11:15:19 | \n", "[multidisciplinary] | \n", "[journal_articles, conference_and_workshop_pap... | \n", "[[dublin city university, [dcu], ie, [], , htt... | \n", "True | \n", "True | \n", "True | \n", "True | \n", "eprints 3.0.5 | \n", "true | \n", "
4 | \n", "opendoar____::d1f491a404d6854880943e5c3cd9ca25 | \n", "129 | \n", "earth-prints repository | \n", "[] | \n", "http://www.earth-prints.org/ | \n", "a subject based repository providing open acce... | \n", "disciplinary | \n", "2021-04-19 08:28:38 | \n", "2006-01-30 16:43:11 | \n", "[earth and planetary sciences] | \n", "[journal_articles, conference_and_workshop_pap... | \n", "[[istituto nazionale di geofisica e vulcanolog... | \n", "True | \n", "True | \n", "True | \n", "True | \n", "dspace 5.8.1-snapshot | \n", "true | \n", "
\n", " | openaire_id | \n", "opendoar_id | \n", "repository_name | \n", "additional_name | \n", "repository_url | \n", "description | \n", "type | \n", "update_date | \n", "start_date | \n", "subject | \n", "content_type | \n", "institution | \n", "metadata_policy | \n", "data_policy | \n", "submission_policy | \n", "content_policy | \n", "software | \n", "api | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "5707 | \n", "5707.000000 | \n", "5707 | \n", "2138 | \n", "5707 | \n", "5425 | \n", "5707 | \n", "5707 | \n", "5707 | \n", "5542 | \n", "5563 | \n", "5707 | \n", "5707 | \n", "5707 | \n", "5707 | \n", "5707 | \n", "5707 | \n", "5707 | \n", "
unique | \n", "5707 | \n", "NaN | \n", "5670 | \n", "2096 | \n", "5670 | \n", "4622 | \n", "4 | \n", "2501 | \n", "5538 | \n", "819 | \n", "476 | \n", "5098 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "321 | \n", "2 | \n", "
top | \n", "opendoar____::3cf166c6b73f030b4f67eeaeba301103 | \n", "NaN | \n", "hiroshima associated repository portal | \n", "[] | \n", "http://harp.lib.hiroshima-u.ac.jp/ | \n", "this site provides access to the research outp... | \n", "institutional | \n", "2020-09-18 12:53:48 | \n", "2020-09-18 12:53:48 | \n", "[multidisciplinary] | \n", "[theses_and_dissertations] | \n", "[[rijksuniversiteit groningen, [rug], nl, [], ... | \n", "False | \n", "False | \n", "False | \n", "False | \n", "dspace | \n", "true | \n", "
freq | \n", "1 | \n", "NaN | \n", "3 | \n", "4 | \n", "3 | \n", "95 | \n", "5067 | \n", "82 | \n", "82 | \n", "3212 | \n", "460 | \n", "26 | \n", "4116 | \n", "4101 | \n", "5016 | \n", "4075 | \n", "800 | \n", "4374 | \n", "
mean | \n", "NaN | \n", "4008.118801 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
std | \n", "NaN | \n", "2869.948770 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
min | \n", "NaN | \n", "2.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
25% | \n", "NaN | \n", "1823.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
50% | \n", "NaN | \n", "3361.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
75% | \n", "NaN | \n", "5095.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
max | \n", "NaN | \n", "10175.000000 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "