registries_analysis/notebooks/01.3-exploration-roar.ipynb

5153 lines
212 KiB
Plaintext
Raw Normal View History

2021-07-22 11:35:40 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import ast\n",
"import csv\n",
"import json\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n",
"import plotly.express as px\n",
"\n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Loading datasets"
]
},
{
"cell_type": "code",
2021-07-23 12:38:56 +02:00
"execution_count": 2,
2021-07-22 11:35:40 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2021-07-23 12:38:56 +02:00
" <th>eprintid</th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
2021-07-22 11:35:40 +02:00
" <th>type</th>\n",
2021-07-23 12:38:56 +02:00
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
2021-07-22 11:35:40 +02:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2021-07-23 12:38:56 +02:00
" <td>921</td>\n",
" <td>517</td>\n",
" <td>archive</td>\n",
2021-07-22 11:35:40 +02:00
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>disk0/00/00/09/21</td>\n",
" <td>2010-01-06 13:44:55</td>\n",
" <td>2016-04-17 21:54:11</td>\n",
" <td>2010-01-06 13:44:55</td>\n",
" <td>theses</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>show</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>0</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>FALSE</td>\n",
" <td>http://alcme.oclc.org/ndltd/index.html</td>\n",
" <td>Networked Digital Library of Theses and Disser...</td>\n",
" <td>http://alcme.oclc.org/ndltd/servlet/OAIHandler</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>us</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>etddb</td>\n",
" <td>geoname_2_US</td>\n",
" <td>other</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>2006-04-18</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1533400</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,94862,168074,5...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>0</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1489</td>\n",
2021-07-23 12:38:56 +02:00
" <td>556</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/14/89</td>\n",
" <td>2010-01-06 13:46:05</td>\n",
" <td>2016-04-17 21:49:00</td>\n",
" <td>2010-01-06 13:46:05</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" <td>http://prensahistorica.mcu.es/prensahistorica/...</td>\n",
" <td>Virtual Library of Historical Press</td>\n",
2021-07-23 12:38:56 +02:00
" <td>http://prensahistorica.mcu.es/prensahistorica/...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>The Virtual Library of Historical Press is the...</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" <td>es</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>geoname_2_ES</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2006-04-04 00:00:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1008486</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1431,1431,1761...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>858</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>47500</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>606</td>\n",
2021-07-23 12:38:56 +02:00
" <td>657</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/06/06</td>\n",
" <td>2010-01-06 13:44:31</td>\n",
" <td>2016-04-17 21:53:14</td>\n",
" <td>2010-01-06 13:44:31</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" <td>http://hal.archives-ouvertes.fr/</td>\n",
" <td>HAL: Hyper Article en Ligne</td>\n",
2021-07-23 12:38:56 +02:00
" <td>http://hal.archives-ouvertes.fr/oai/oai.php</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" <td>fr</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>hal</td>\n",
" <td>geoname_2_FR</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>1998-11-02 11:53:57</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>675816</td>\n",
" <td>4,12,17,26,43,57,81,185,431,861,1184,1517,2442...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>opendoar</td>\n",
" <td>166</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>3063</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>606</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>1106</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>606</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roarmap</td>\n",
" <td>69</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" eprintid rev_number eprint_status userid importid source dir \\\n",
"0 921 517 archive 1 NaN NaN disk0/00/00/09/21 \n",
"1 1489 556 archive 1 NaN NaN disk0/00/00/14/89 \n",
"2 606 657 archive 1 NaN NaN disk0/00/00/06/06 \n",
"3 606 NaN NaN NaN NaN NaN NaN \n",
"4 606 NaN NaN NaN NaN NaN NaN \n",
"\n",
" datestamp lastmod status_changed type \\\n",
"0 2010-01-06 13:44:55 2016-04-17 21:54:11 2010-01-06 13:44:55 theses \n",
"1 2010-01-06 13:46:05 2016-04-17 21:49:00 2010-01-06 13:46:05 other \n",
"2 2010-01-06 13:44:31 2016-04-17 21:53:14 2010-01-06 13:44:31 subject \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" succeeds commentary metadata_visibility latitude longitude relation_type \\\n",
"0 NaN NaN show NaN NaN NaN \n",
"1 NaN NaN show NaN NaN NaN \n",
"2 NaN NaN show NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN \n",
"\n",
" relation_uri item_issues_id item_issues_type item_issues_description \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
"0 NaN NaN 0 \n",
"1 NaN NaN 0 \n",
"2 NaN NaN 0 \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" sword_depositor sword_slug exemplar \\\n",
"0 NaN NaN FALSE \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" home_page \\\n",
"0 http://alcme.oclc.org/ndltd/index.html \n",
"1 http://prensahistorica.mcu.es/prensahistorica/... \n",
"2 http://hal.archives-ouvertes.fr/ \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" title \\\n",
"0 Networked Digital Library of Theses and Disser... \n",
"1 Virtual Library of Historical Press \n",
"2 HAL: Hyper Article en Ligne \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" oai_pmh sword_endpoint rss_feed \\\n",
"0 http://alcme.oclc.org/ndltd/servlet/OAIHandler NaN NaN \n",
"1 http://prensahistorica.mcu.es/prensahistorica/... NaN NaN \n",
"2 http://hal.archives-ouvertes.fr/oai/oai.php NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" twitter_feed description fulltext \\\n",
"0 NaN NaN TRUE \n",
"1 NaN The Virtual Library of Historical Press is the... TRUE \n",
"2 NaN NaN TRUE \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" open_access mandate organisation_title organisation_home_page \\\n",
"0 TRUE NaN NaN NaN \n",
"1 TRUE NaN NaN NaN \n",
"2 TRUE NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" location_country location_city location_latitude location_longitude \\\n",
"0 us NaN NaN NaN \n",
"1 es NaN NaN NaN \n",
"2 fr NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" software geoname version subjects date note \\\n",
"0 etddb geoname_2_US other NaN 2006-04-18 NaN \n",
"1 other geoname_2_ES other NaN 2006-04-04 00:00:00 NaN \n",
"2 hal geoname_2_FR other NaN 1998-11-02 11:53:57 NaN \n",
"3 NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN \n",
"\n",
" suggestions activity_low activity_medium activity_high recordcount \\\n",
"0 NaN 0 0 0 1533400 \n",
"1 NaN 0 0 0 1008486 \n",
"2 NaN 0 0 2 675816 \n",
"3 NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN \n",
"\n",
" recordhistory fulltexts_total \\\n",
"0 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,94862,168074,5... NaN \n",
"1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1431,1431,1761... NaN \n",
"2 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name registry_id \\\n",
"0 NaN NaN NaN celestial 5 \n",
"1 NaN NaN NaN celestial 858 \n",
"2 NaN NaN NaN opendoar 166 \n",
"3 NaN NaN NaN celestial 1106 \n",
"4 NaN NaN NaN roarmap 69 \n",
"\n",
" submit_to submitted_to_name submitted_to_done webometrics_rank \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN 1 \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" webometrics_size webometrics_visibility webometrics_rich_files \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 6 1 1 \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" webometrics_scholar monthly_deposits \\\n",
"0 NaN 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
"1 NaN 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
"2 1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" total_deposits association \n",
"0 0 NaN \n",
"1 47500 NaN \n",
"2 3063 NaN \n",
"3 NaN NaN \n",
"4 NaN NaN "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df = pd.read_csv('../data/raw/export_roar_CSV.csv', dtype='str')\n",
"roar_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['eprintid', 'rev_number', 'eprint_status', 'userid', 'importid',\n",
" 'source', 'dir', 'datestamp', 'lastmod', 'status_changed', 'type',\n",
" 'succeeds', 'commentary', 'metadata_visibility', 'latitude',\n",
" 'longitude', 'relation_type', 'relation_uri', 'item_issues_id',\n",
" 'item_issues_type', 'item_issues_description', 'item_issues_timestamp',\n",
" 'item_issues_status', 'item_issues_reported_by',\n",
" 'item_issues_resolved_by', 'item_issues_comment', 'item_issues_count',\n",
" 'sword_depositor', 'sword_slug', 'exemplar', 'home_page', 'title',\n",
" 'oai_pmh', 'sword_endpoint', 'rss_feed', 'twitter_feed', 'description',\n",
" 'fulltext', 'open_access', 'mandate', 'organisation_title',\n",
" 'organisation_home_page', 'location_country', 'location_city',\n",
" 'location_latitude', 'location_longitude', 'software', 'geoname',\n",
" 'version', 'subjects', 'date', 'note', 'suggestions', 'activity_low',\n",
" 'activity_medium', 'activity_high', 'recordcount', 'recordhistory',\n",
" 'fulltexts_total', 'fulltexts_docs', 'fulltexts_rtotal',\n",
" 'fulltexts_rdocs', 'registry_name', 'registry_id', 'submit_to',\n",
" 'submitted_to_name', 'submitted_to_done', 'webometrics_rank',\n",
" 'webometrics_size', 'webometrics_visibility', 'webometrics_rich_files',\n",
" 'webometrics_scholar', 'monthly_deposits', 'total_deposits',\n",
" 'association'],\n",
" dtype='object')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df.columns"
]
},
{
"cell_type": "code",
"execution_count": 4,
2021-07-22 11:35:40 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>eprintid</th>\n",
2021-07-23 12:38:56 +02:00
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
2021-07-22 11:35:40 +02:00
" <th>home_page</th>\n",
" <th>title</th>\n",
2021-07-23 12:38:56 +02:00
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
2021-07-22 11:35:40 +02:00
" <th>location_country</th>\n",
2021-07-23 12:38:56 +02:00
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
2021-07-22 11:35:40 +02:00
" <th>subjects</th>\n",
2021-07-23 12:38:56 +02:00
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
2021-07-22 11:35:40 +02:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-07-23 12:38:56 +02:00
" <th>count</th>\n",
2022-02-14 13:34:42 +01:00
" <td>18079</td>\n",
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>5444</td>\n",
2021-07-23 12:38:56 +02:00
" <td>0</td>\n",
" <td>0</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>108</td>\n",
2021-07-23 12:38:56 +02:00
" <td>0</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5444</td>\n",
2021-07-23 12:38:56 +02:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>86</td>\n",
" <td>86</td>\n",
" <td>86</td>\n",
" <td>86</td>\n",
" <td>86</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2242</td>\n",
2021-07-23 12:38:56 +02:00
" <td>0</td>\n",
" <td>0</td>\n",
2022-02-14 13:34:42 +01:00
" <td>268</td>\n",
" <td>5437</td>\n",
" <td>5442</td>\n",
" <td>4567</td>\n",
" <td>178</td>\n",
" <td>1538</td>\n",
" <td>116</td>\n",
" <td>3837</td>\n",
" <td>4197</td>\n",
" <td>4197</td>\n",
" <td>3746</td>\n",
" <td>5253</td>\n",
" <td>4965</td>\n",
" <td>5268</td>\n",
" <td>3839</td>\n",
" <td>3798</td>\n",
" <td>3780</td>\n",
" <td>4700</td>\n",
" <td>4730</td>\n",
" <td>5444</td>\n",
" <td>10941</td>\n",
" <td>5429</td>\n",
" <td>218</td>\n",
" <td>189</td>\n",
" <td>2288</td>\n",
" <td>2288</td>\n",
" <td>2288</td>\n",
" <td>2290</td>\n",
" <td>2288</td>\n",
2021-07-23 12:38:56 +02:00
" <td>270</td>\n",
" <td>258</td>\n",
" <td>270</td>\n",
" <td>258</td>\n",
2022-02-14 13:34:42 +01:00
" <td>7393</td>\n",
" <td>7258</td>\n",
" <td>730</td>\n",
2021-07-23 12:38:56 +02:00
" <td>205</td>\n",
" <td>205</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>756</td>\n",
" <td>756</td>\n",
2022-02-14 13:34:42 +01:00
" <td>237</td>\n",
2021-07-23 12:38:56 +02:00
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
2022-02-14 13:34:42 +01:00
" <td>5444</td>\n",
" <td>660</td>\n",
2021-07-23 12:38:56 +02:00
" <td>1</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2189</td>\n",
2021-07-23 12:38:56 +02:00
" <td>0</td>\n",
" <td>0</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5444</td>\n",
" <td>4198</td>\n",
" <td>4043</td>\n",
" <td>4230</td>\n",
2021-07-23 12:38:56 +02:00
" <td>12</td>\n",
2022-02-14 13:34:42 +01:00
" <td>108</td>\n",
2021-07-23 12:38:56 +02:00
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>51</td>\n",
" <td>3</td>\n",
" <td>68</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5271</td>\n",
" <td>5143</td>\n",
" <td>4278</td>\n",
" <td>172</td>\n",
" <td>1485</td>\n",
" <td>112</td>\n",
" <td>3359</td>\n",
2021-07-23 12:38:56 +02:00
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
2022-02-14 13:34:42 +01:00
" <td>4498</td>\n",
" <td>4395</td>\n",
2021-07-23 12:38:56 +02:00
" <td>136</td>\n",
2022-02-14 13:34:42 +01:00
" <td>1896</td>\n",
2021-07-23 12:38:56 +02:00
" <td>2965</td>\n",
2022-02-14 13:34:42 +01:00
" <td>3003</td>\n",
2021-07-23 12:38:56 +02:00
" <td>31</td>\n",
" <td>126</td>\n",
" <td>53</td>\n",
" <td>237</td>\n",
2022-02-14 13:34:42 +01:00
" <td>4898</td>\n",
" <td>210</td>\n",
" <td>173</td>\n",
2021-07-23 12:38:56 +02:00
" <td>72</td>\n",
" <td>54</td>\n",
" <td>16</td>\n",
" <td>741</td>\n",
2022-02-14 13:34:42 +01:00
" <td>1702</td>\n",
2021-07-23 12:38:56 +02:00
" <td>135</td>\n",
" <td>118</td>\n",
" <td>134</td>\n",
" <td>117</td>\n",
" <td>3</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5165</td>\n",
2021-07-23 12:38:56 +02:00
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>146</td>\n",
" <td>143</td>\n",
" <td>346</td>\n",
" <td>342</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
2022-02-14 13:34:42 +01:00
" <td>12118</td>\n",
2021-07-23 12:38:56 +02:00
" <td>11</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/09/21</td>\n",
2021-07-23 12:38:56 +02:00
" <td>2010-01-06 13:43:48</td>\n",
" <td>2016-04-17 21:55:19</td>\n",
2021-07-23 12:38:56 +02:00
" <td>2010-01-06 13:43:48</td>\n",
" <td>institutional</td>\n",
" <td>3164</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>bad_oai_pmh_url_0</td>\n",
" <td>duplicate_title</td>\n",
" <td>Similar title to &lt;xhtml:table xmlns:xhtml=\"htt...</td>\n",
" <td>2010-01-13 10:44:49</td>\n",
" <td>discovered</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>FALSE</td>\n",
" <td>http://eprints.upnjatim.ac.id/</td>\n",
" <td>Repositorio Institucional</td>\n",
" <td>http://virtuelcampus.univ-msila.dz/fll</td>\n",
2022-02-14 13:34:42 +01:00
" <td>http://producao.usp.br/sword/servicedocument</td>\n",
2021-07-23 12:38:56 +02:00
" <td>http://eprints.upnjatim.ac.id/cgi/latest_tool?...</td>\n",
" <td>http://twitter.com/bu_ufsc</td>\n",
2021-07-23 12:38:56 +02:00
" <td>info:other:archives.eprints.org:import</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>FALSE</td>\n",
" <td>Chinese Academy of Science (中国科学院)</td>\n",
" <td>http://www.cas.cn/</td>\n",
" <td>us</td>\n",
" <td>Lima</td>\n",
" <td>34.1607</td>\n",
" <td>-118.139</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_US</td>\n",
" <td>other</td>\n",
" <td>L1</td>\n",
" <td>2006-05-04 10:48:14</td>\n",
2021-07-26 11:15:14 +02:00
" <td>¿Quién puede depositar documentos en el reposi...</td>\n",
2021-07-23 12:38:56 +02:00
" <td>This repository is hosted by the Texas Digital...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>100</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>celestial</td>\n",
" <td>184</td>\n",
" <td>celestial</td>\n",
" <td>opendoar</td>\n",
" <td>2021-01-25</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
2021-07-23 12:38:56 +02:00
" <td>824</td>\n",
" <td>806</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>0</td>\n",
" <td>russell_group</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>238</td>\n",
2022-02-14 13:34:42 +01:00
" <td>333</td>\n",
" <td>5444</td>\n",
" <td>1330</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>16</td>\n",
" <td>8</td>\n",
" <td>16</td>\n",
2022-02-14 13:34:42 +01:00
" <td>3853</td>\n",
2021-07-23 12:38:56 +02:00
" <td>1</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5402</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>18</td>\n",
" <td>47</td>\n",
" <td>3</td>\n",
" <td>68</td>\n",
" <td>53</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2201</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>261</td>\n",
2021-07-23 12:38:56 +02:00
" <td>4</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>112</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2805</td>\n",
" <td>2696</td>\n",
" <td>2748</td>\n",
2021-07-23 12:38:56 +02:00
" <td>9</td>\n",
" <td>9</td>\n",
2022-02-14 13:34:42 +01:00
" <td>907</td>\n",
" <td>76</td>\n",
2021-07-23 12:38:56 +02:00
" <td>25</td>\n",
" <td>25</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2341</td>\n",
" <td>845</td>\n",
" <td>4841</td>\n",
" <td>362</td>\n",
2021-07-23 12:38:56 +02:00
" <td>99</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2012</td>\n",
" <td>2074</td>\n",
" <td>2210</td>\n",
" <td>730</td>\n",
2021-07-23 12:38:56 +02:00
" <td>95</td>\n",
" <td>113</td>\n",
" <td>114</td>\n",
" <td>113</td>\n",
" <td>114</td>\n",
2022-02-14 13:34:42 +01:00
" <td>3741</td>\n",
2021-07-23 12:38:56 +02:00
" <td>7</td>\n",
2022-02-14 13:34:42 +01:00
" <td>283</td>\n",
2021-07-23 12:38:56 +02:00
" <td>205</td>\n",
" <td>205</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>387</td>\n",
" <td>387</td>\n",
2022-02-14 13:34:42 +01:00
" <td>144</td>\n",
2021-07-23 12:38:56 +02:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" eprintid rev_number eprint_status userid importid source \\\n",
2022-02-14 13:34:42 +01:00
"count 18079 5444 5444 5444 0 0 \n",
"unique 5444 660 1 2189 0 0 \n",
"top 12118 11 archive 1 NaN NaN \n",
"freq 238 333 5444 1330 NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" dir datestamp lastmod \\\n",
2022-02-14 13:34:42 +01:00
"count 5444 5444 5444 \n",
"unique 5444 4198 4043 \n",
"top disk0/00/00/09/21 2010-01-06 13:43:48 2016-04-17 21:55:19 \n",
2021-07-23 12:38:56 +02:00
"freq 1 16 8 \n",
"\n",
" status_changed type succeeds commentary \\\n",
2022-02-14 13:34:42 +01:00
"count 5444 5444 108 0 \n",
"unique 4230 12 108 0 \n",
"top 2010-01-06 13:43:48 institutional 3164 NaN \n",
2022-02-14 13:34:42 +01:00
"freq 16 3853 1 NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" metadata_visibility latitude longitude relation_type relation_uri \\\n",
2022-02-14 13:34:42 +01:00
"count 5444 0 0 0 0 \n",
"unique 2 0 0 0 0 \n",
"top show NaN NaN NaN NaN \n",
2022-02-14 13:34:42 +01:00
"freq 5402 NaN NaN NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" item_issues_id item_issues_type \\\n",
"count 86 86 \n",
"unique 51 3 \n",
"top bad_oai_pmh_url_0 duplicate_title \n",
"freq 18 47 \n",
2021-07-23 12:38:56 +02:00
"\n",
" item_issues_description \\\n",
"count 86 \n",
"unique 68 \n",
"top Similar title to <xhtml:table xmlns:xhtml=\"htt... \n",
"freq 3 \n",
2021-07-23 12:38:56 +02:00
"\n",
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
"count 86 86 0 \n",
"unique 3 2 0 \n",
"top 2010-01-13 10:44:49 discovered NaN \n",
"freq 68 53 NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
2022-02-14 13:34:42 +01:00
"count 0 0 2242 \n",
"unique 0 0 4 \n",
"top NaN NaN 0 \n",
2022-02-14 13:34:42 +01:00
"freq NaN NaN 2201 \n",
2021-07-23 12:38:56 +02:00
"\n",
" sword_depositor sword_slug exemplar home_page \\\n",
2022-02-14 13:34:42 +01:00
"count 0 0 268 5437 \n",
"unique 0 0 2 5271 \n",
"top NaN NaN FALSE http://eprints.upnjatim.ac.id/ \n",
2022-02-14 13:34:42 +01:00
"freq NaN NaN 261 4 \n",
2021-07-23 12:38:56 +02:00
"\n",
" title oai_pmh \\\n",
2022-02-14 13:34:42 +01:00
"count 5442 4567 \n",
"unique 5143 4278 \n",
"top Repositorio Institucional http://virtuelcampus.univ-msila.dz/fll \n",
"freq 7 5 \n",
2021-07-23 12:38:56 +02:00
"\n",
2022-02-14 13:34:42 +01:00
" sword_endpoint \\\n",
"count 178 \n",
"unique 172 \n",
"top http://producao.usp.br/sword/servicedocument \n",
"freq 2 \n",
2021-07-23 12:38:56 +02:00
"\n",
" rss_feed \\\n",
2022-02-14 13:34:42 +01:00
"count 1538 \n",
"unique 1485 \n",
"top http://eprints.upnjatim.ac.id/cgi/latest_tool?... \n",
"freq 5 \n",
2021-07-23 12:38:56 +02:00
"\n",
" twitter_feed description \\\n",
2022-02-14 13:34:42 +01:00
"count 116 3837 \n",
"unique 112 3359 \n",
"top http://twitter.com/bu_ufsc info:other:archives.eprints.org:import \n",
"freq 2 112 \n",
2021-07-23 12:38:56 +02:00
"\n",
" fulltext open_access mandate organisation_title \\\n",
2022-02-14 13:34:42 +01:00
"count 4197 4197 3746 5253 \n",
"unique 2 2 2 4498 \n",
"top TRUE TRUE FALSE Chinese Academy of Science (中国科学院) \n",
2022-02-14 13:34:42 +01:00
"freq 2805 2696 2748 9 \n",
2021-07-23 12:38:56 +02:00
"\n",
" organisation_home_page location_country location_city \\\n",
2022-02-14 13:34:42 +01:00
"count 4965 5268 3839 \n",
"unique 4395 136 1896 \n",
"top http://www.cas.cn/ us Lima \n",
2022-02-14 13:34:42 +01:00
"freq 9 907 76 \n",
2021-07-23 12:38:56 +02:00
"\n",
" location_latitude location_longitude software geoname version \\\n",
2022-02-14 13:34:42 +01:00
"count 3798 3780 4700 4730 5444 \n",
"unique 2965 3003 31 126 53 \n",
"top 34.1607 -118.139 dspace geoname_2_US other \n",
2022-02-14 13:34:42 +01:00
"freq 25 25 2341 845 4841 \n",
2021-07-23 12:38:56 +02:00
"\n",
" subjects date \\\n",
2022-02-14 13:34:42 +01:00
"count 10941 5429 \n",
"unique 237 4898 \n",
"top L1 2006-05-04 10:48:14 \n",
2022-02-14 13:34:42 +01:00
"freq 362 99 \n",
2021-07-23 12:38:56 +02:00
"\n",
" note \\\n",
2022-02-14 13:34:42 +01:00
"count 218 \n",
"unique 210 \n",
"top ¿Quién puede depositar documentos en el reposi... \n",
"freq 2 \n",
2021-07-23 12:38:56 +02:00
"\n",
" suggestions activity_low \\\n",
2022-02-14 13:34:42 +01:00
"count 189 2288 \n",
"unique 173 72 \n",
"top This repository is hosted by the Texas Digital... 0 \n",
2022-02-14 13:34:42 +01:00
"freq 9 2012 \n",
2021-07-23 12:38:56 +02:00
"\n",
" activity_medium activity_high recordcount \\\n",
2022-02-14 13:34:42 +01:00
"count 2288 2288 2290 \n",
"unique 54 16 741 \n",
"top 0 0 100 \n",
2022-02-14 13:34:42 +01:00
"freq 2074 2210 730 \n",
2021-07-23 12:38:56 +02:00
"\n",
" recordhistory fulltexts_total \\\n",
2022-02-14 13:34:42 +01:00
"count 2288 270 \n",
"unique 1702 135 \n",
"top 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 0 \n",
"freq 95 113 \n",
2021-07-23 12:38:56 +02:00
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name \\\n",
2022-02-14 13:34:42 +01:00
"count 258 270 258 7393 \n",
"unique 118 134 117 3 \n",
"top 0 0 0 celestial \n",
2022-02-14 13:34:42 +01:00
"freq 114 113 114 3741 \n",
"\n",
" registry_id submit_to submitted_to_name submitted_to_done \\\n",
2022-02-14 13:34:42 +01:00
"count 7258 730 205 205 \n",
"unique 5165 3 1 1 \n",
"top 184 celestial opendoar 2021-01-25 \n",
2022-02-14 13:34:42 +01:00
"freq 7 283 205 205 \n",
"\n",
" webometrics_rank webometrics_size webometrics_visibility \\\n",
"count 148 148 148 \n",
"unique 148 148 148 \n",
"top 1 6 1 \n",
"freq 1 1 1 \n",
"\n",
" webometrics_rich_files webometrics_scholar \\\n",
"count 148 148 \n",
"unique 146 143 \n",
"top 824 806 \n",
"freq 3 5 \n",
"\n",
" monthly_deposits total_deposits \\\n",
"count 756 756 \n",
"unique 346 342 \n",
"top 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 0 \n",
"freq 387 387 \n",
"\n",
" association \n",
2022-02-14 13:34:42 +01:00
"count 237 \n",
"unique 2 \n",
"top russell_group \n",
2022-02-14 13:34:42 +01:00
"freq 144 "
2021-07-23 12:38:56 +02:00
]
},
"execution_count": 4,
2021-07-23 12:38:56 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df.describe(include='all')"
2021-07-23 12:38:56 +02:00
]
},
{
"cell_type": "code",
"execution_count": 5,
2021-07-23 12:38:56 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>eprintid</th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>606</td>\n",
" <td>657</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/06/06</td>\n",
" <td>2010-01-06 13:44:31</td>\n",
" <td>2016-04-17 21:53:14</td>\n",
" <td>2010-01-06 13:44:31</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://hal.archives-ouvertes.fr/</td>\n",
" <td>HAL: Hyper Article en Ligne</td>\n",
" <td>http://hal.archives-ouvertes.fr/oai/oai.php</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>hal</td>\n",
" <td>geoname_2_FR</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>1998-11-02 11:53:57</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>675816</td>\n",
" <td>4,12,17,26,43,57,81,185,431,861,1184,1517,2442...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>opendoar</td>\n",
" <td>166</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>3063</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>606</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>1106</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>606</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roarmap</td>\n",
" <td>69</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" eprintid rev_number eprint_status userid importid source dir \\\n",
"2 606 657 archive 1 NaN NaN disk0/00/00/06/06 \n",
"3 606 NaN NaN NaN NaN NaN NaN \n",
"4 606 NaN NaN NaN NaN NaN NaN \n",
"\n",
" datestamp lastmod status_changed type \\\n",
"2 2010-01-06 13:44:31 2016-04-17 21:53:14 2010-01-06 13:44:31 subject \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" succeeds commentary metadata_visibility latitude longitude relation_type \\\n",
"2 NaN NaN show NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN \n",
"\n",
" relation_uri item_issues_id item_issues_type item_issues_description \\\n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
"2 NaN NaN 0 \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" sword_depositor sword_slug exemplar home_page \\\n",
"2 NaN NaN NaN http://hal.archives-ouvertes.fr/ \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" title oai_pmh \\\n",
"2 HAL: Hyper Article en Ligne http://hal.archives-ouvertes.fr/oai/oai.php \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" sword_endpoint rss_feed twitter_feed description fulltext open_access \\\n",
"2 NaN NaN NaN NaN TRUE TRUE \n",
"3 NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN \n",
"\n",
" mandate organisation_title organisation_home_page location_country \\\n",
"2 NaN NaN NaN fr \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" location_city location_latitude location_longitude software geoname \\\n",
"2 NaN NaN NaN hal geoname_2_FR \n",
"3 NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN \n",
"\n",
" version subjects date note suggestions activity_low \\\n",
"2 other NaN 1998-11-02 11:53:57 NaN NaN 0 \n",
"3 NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN \n",
"\n",
" activity_medium activity_high recordcount \\\n",
"2 0 2 675816 \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" recordhistory fulltexts_total \\\n",
"2 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name registry_id \\\n",
"2 NaN NaN NaN opendoar 166 \n",
"3 NaN NaN NaN celestial 1106 \n",
"4 NaN NaN NaN roarmap 69 \n",
"\n",
" submit_to submitted_to_name submitted_to_done webometrics_rank \\\n",
"2 NaN NaN NaN 1 \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" webometrics_size webometrics_visibility webometrics_rich_files \\\n",
"2 6 1 1 \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" webometrics_scholar monthly_deposits \\\n",
"2 1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" total_deposits association \n",
"2 3063 NaN \n",
"3 NaN NaN \n",
"4 NaN NaN "
]
},
"execution_count": 5,
2021-07-23 12:38:56 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df[roar_df.eprintid == '606']"
]
},
{
"cell_type": "code",
"execution_count": 6,
2021-07-23 12:38:56 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
" </tr>\n",
" <tr>\n",
" <th>eprintid</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>{nan, 633}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{archive, nan}</td>\n",
" <td>{1, nan}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{nan, disk0/00/00/00/01}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, 2010-01-06 13:43:48}</td>\n",
" <td>{nan, 2011-07-18 05:40:07}</td>\n",
" <td>{nan, 2010-01-06 13:43:48}</td>\n",
" <td>{nan, subject}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{nan, show}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, 0}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, http://archivesic.ccsd.cnrs.fr/}</td>\n",
" <td>{nan, @RCHIVESIC }</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, http://archivesic.ccsd.cnrs.fr/oai/oai.php}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, fr}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, hal}</td>\n",
" <td>{nan, geoname_2_FR}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, other}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, 2002-05-17 19:24:41}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, 0}</td>\n",
" <td>{nan, 0}</td>\n",
" <td>{nan, 0}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{nan, 25}</td>\n",
" <td>{nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{opendoar, celestial}</td>\n",
" <td>{669, 58}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>{nan, 511}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{archive, nan}</td>\n",
" <td>{1, nan}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{disk0/00/00/00/10, nan}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, 2010-01-06 13:43:48}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{2011-07-18 05:40:13, nan}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, 2010-01-06 13:43:48}</td>\n",
" <td>{nan, institutional}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{nan, show}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, 0}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{nan, http://www.diva-portal.org/mdh/}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, Academic Archive On-line (Mälardalen Uni...</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{nan, http://www.diva-portal.org/oai/mdh/OAI}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{nan, TRUE}</td>\n",
" <td>{nan, TRUE}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, se}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{Uppsala, nan}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, 59.8667}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{17.6333, nan}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, diva}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{nan, geoname_2_SE}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, other}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, 2005-12-08 13:15:22}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, 0}</td>\n",
" <td>{nan, 0}</td>\n",
" <td>{nan, 0}</td>\n",
" <td>{nan, 100}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,1...</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{opendoar, celestial}</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{258, 526}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1000</th>\n",
" <td>{274}</td>\n",
" <td>{archive}</td>\n",
" <td>{1}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{disk0/00/00/10/00}</td>\n",
" <td>{2010-01-06 13:45:01}</td>\n",
" <td>{2011-07-06 08:21:21}</td>\n",
" <td>{2010-01-06 13:45:01}</td>\n",
" <td>{subject}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{show}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{0}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{http://pam.pisharp.org/}</td>\n",
" <td>{PAM - Portuguese Archive of Mathematics}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{TRUE}</td>\n",
" <td>{TRUE}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{pt}</td>\n",
" <td>{Bellevue, WA}</td>\n",
" <td>{47.6034}</td>\n",
" <td>{-122.155}</td>\n",
" <td>{dspace}</td>\n",
" <td>{geoname_2_PT}</td>\n",
" <td>{other}</td>\n",
" <td>{nan}</td>\n",
" <td>{2006-05-04 10:48:14}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10001</th>\n",
" <td>{nan, 20}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{archive, nan}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, 91}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, disk0/00/01/00/01}</td>\n",
" <td>{nan, 2015-08-08 14:52:11}</td>\n",
" <td>{nan, 2016-03-21 19:44:01}</td>\n",
" <td>{nan, 2015-08-08 14:52:11}</td>\n",
" <td>{nan, subject}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{nan, show}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, http://edoc.sub.uni-hamburg.de/klimawand...</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{nan, Klimawandel Dokumentenserver}</td>\n",
" <td>{http://edoc.sub.uni-hamburg.de/klimawandel/oa...</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, The \"Documentenserver Klimawandel\" (Repo...</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{nan, TRUE}</td>\n",
" <td>{nan, TRUE}</td>\n",
" <td>{nan, TRUE}</td>\n",
" <td>{nan, Climate Service Center 2.0, Helmholtz-Ze...</td>\n",
" <td>{nan, http://www.climateservicecenter.de/, htt...</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, de}</td>\n",
" <td>{nan, Hamburg}</td>\n",
" <td>{nan, 53.5511}</td>\n",
" <td>{nan, 9.9937}</td>\n",
" <td>{nan, opus}</td>\n",
" <td>{nan, geoname_2_DE}</td>\n",
" <td>{nan, other}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{G1, GE, HD, S1, GF}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, 2015-07-02 08:08:31}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
2022-02-14 13:34:42 +01:00
" <td>{nan, opendoar, celestial}</td>\n",
" <td>{3408, nan, 5881}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10008</th>\n",
" <td>{11}</td>\n",
" <td>{archive}</td>\n",
" <td>{404}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{disk0/00/01/00/08}</td>\n",
" <td>{2015-08-08 14:52:26}</td>\n",
" <td>{2016-03-21 19:43:51}</td>\n",
" <td>{2015-08-08 14:52:26}</td>\n",
" <td>{institutional}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{show}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{http://creativematter.skidmore.edu/}</td>\n",
" <td>{Creative Matter | Skidmore College Research}</td>\n",
" <td>{http://creativematter.skidmore.edu/do/oai/}</td>\n",
" <td>{nan}</td>\n",
" <td>{http://creativematter.skidmore.edu/recent.rss}</td>\n",
" <td>{nan}</td>\n",
" <td>{Welcome to Creative Matter, a repository for ...</td>\n",
" <td>{TRUE}</td>\n",
" <td>{FALSE}</td>\n",
" <td>{FALSE}</td>\n",
" <td>{Skidmore College}</td>\n",
" <td>{http://www.skidmore.edu/}</td>\n",
" <td>{us}</td>\n",
" <td>{Saratoga Springs}</td>\n",
" <td>{43.0961}</td>\n",
" <td>{-73.7818}</td>\n",
" <td>{bepress}</td>\n",
" <td>{geoname_2_US}</td>\n",
" <td>{other}</td>\n",
" <td>{nan}</td>\n",
" <td>{2015-07-06 17:35:50}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{celestial}</td>\n",
" <td>{5882}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" rev_number eprint_status userid importid source \\\n",
"eprintid \n",
2022-02-14 13:34:42 +01:00
"1 {nan, 633} {archive, nan} {1, nan} {nan} {nan} \n",
"10 {nan, 511} {archive, nan} {1, nan} {nan} {nan} \n",
2021-07-23 12:38:56 +02:00
"1000 {274} {archive} {1} {nan} {nan} \n",
2022-02-14 13:34:42 +01:00
"10001 {nan, 20} {archive, nan} {nan, 91} {nan} {nan} \n",
2021-07-23 12:38:56 +02:00
"10008 {11} {archive} {404} {nan} {nan} \n",
"\n",
" dir datestamp \\\n",
"eprintid \n",
2021-07-26 11:15:14 +02:00
"1 {nan, disk0/00/00/00/01} {nan, 2010-01-06 13:43:48} \n",
"10 {disk0/00/00/00/10, nan} {nan, 2010-01-06 13:43:48} \n",
2021-07-23 12:38:56 +02:00
"1000 {disk0/00/00/10/00} {2010-01-06 13:45:01} \n",
"10001 {nan, disk0/00/01/00/01} {nan, 2015-08-08 14:52:11} \n",
"10008 {disk0/00/01/00/08} {2015-08-08 14:52:26} \n",
"\n",
" lastmod status_changed \\\n",
"eprintid \n",
"1 {nan, 2011-07-18 05:40:07} {nan, 2010-01-06 13:43:48} \n",
2022-02-14 13:34:42 +01:00
"10 {2011-07-18 05:40:13, nan} {nan, 2010-01-06 13:43:48} \n",
2021-07-23 12:38:56 +02:00
"1000 {2011-07-06 08:21:21} {2010-01-06 13:45:01} \n",
"10001 {nan, 2016-03-21 19:44:01} {nan, 2015-08-08 14:52:11} \n",
"10008 {2016-03-21 19:43:51} {2015-08-08 14:52:26} \n",
"\n",
" type succeeds commentary metadata_visibility \\\n",
"eprintid \n",
2022-02-14 13:34:42 +01:00
"1 {nan, subject} {nan} {nan} {nan, show} \n",
"10 {nan, institutional} {nan} {nan} {nan, show} \n",
2021-07-23 12:38:56 +02:00
"1000 {subject} {nan} {nan} {show} \n",
2022-02-14 13:34:42 +01:00
"10001 {nan, subject} {nan} {nan} {nan, show} \n",
2021-07-23 12:38:56 +02:00
"10008 {institutional} {nan} {nan} {show} \n",
"\n",
" latitude longitude relation_type relation_uri item_issues_id \\\n",
"eprintid \n",
"1 {nan} {nan} {nan} {nan} {nan} \n",
"10 {nan} {nan} {nan} {nan} {nan} \n",
"1000 {nan} {nan} {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} {nan} {nan} \n",
"\n",
" item_issues_type item_issues_description item_issues_timestamp \\\n",
"eprintid \n",
"1 {nan} {nan} {nan} \n",
"10 {nan} {nan} {nan} \n",
"1000 {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} \n",
"\n",
" item_issues_status item_issues_reported_by item_issues_resolved_by \\\n",
"eprintid \n",
"1 {nan} {nan} {nan} \n",
"10 {nan} {nan} {nan} \n",
"1000 {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} \n",
"\n",
" item_issues_comment item_issues_count sword_depositor sword_slug \\\n",
"eprintid \n",
"1 {nan} {nan, 0} {nan} {nan} \n",
"10 {nan} {nan, 0} {nan} {nan} \n",
"1000 {nan} {0} {nan} {nan} \n",
"10001 {nan} {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} {nan} \n",
"\n",
" exemplar home_page \\\n",
"eprintid \n",
"1 {nan} {nan, http://archivesic.ccsd.cnrs.fr/} \n",
2022-02-14 13:34:42 +01:00
"10 {nan} {nan, http://www.diva-portal.org/mdh/} \n",
2021-07-23 12:38:56 +02:00
"1000 {nan} {http://pam.pisharp.org/} \n",
"10001 {nan} {nan, http://edoc.sub.uni-hamburg.de/klimawand... \n",
2021-07-23 12:38:56 +02:00
"10008 {nan} {http://creativematter.skidmore.edu/} \n",
"\n",
" title \\\n",
"eprintid \n",
"1 {nan, @RCHIVESIC } \n",
2021-07-23 12:38:56 +02:00
"10 {nan, Academic Archive On-line (Mälardalen Uni... \n",
"1000 {PAM - Portuguese Archive of Mathematics} \n",
2022-02-14 13:34:42 +01:00
"10001 {nan, Klimawandel Dokumentenserver} \n",
2021-07-23 12:38:56 +02:00
"10008 {Creative Matter | Skidmore College Research} \n",
"\n",
" oai_pmh sword_endpoint \\\n",
"eprintid \n",
"1 {nan, http://archivesic.ccsd.cnrs.fr/oai/oai.php} {nan} \n",
2021-07-26 11:15:14 +02:00
"10 {nan, http://www.diva-portal.org/oai/mdh/OAI} {nan} \n",
2021-07-23 12:38:56 +02:00
"1000 {nan} {nan} \n",
2022-02-14 13:34:42 +01:00
"10001 {http://edoc.sub.uni-hamburg.de/klimawandel/oa... {nan} \n",
2021-07-23 12:38:56 +02:00
"10008 {http://creativematter.skidmore.edu/do/oai/} {nan} \n",
"\n",
" rss_feed twitter_feed \\\n",
"eprintid \n",
"1 {nan} {nan} \n",
"10 {nan} {nan} \n",
"1000 {nan} {nan} \n",
"10001 {nan} {nan} \n",
"10008 {http://creativematter.skidmore.edu/recent.rss} {nan} \n",
"\n",
" description fulltext \\\n",
"eprintid \n",
"1 {nan} {nan} \n",
2022-02-14 13:34:42 +01:00
"10 {nan} {nan, TRUE} \n",
2021-07-23 12:38:56 +02:00
"1000 {nan} {TRUE} \n",
2022-02-14 13:34:42 +01:00
"10001 {nan, The \"Documentenserver Klimawandel\" (Repo... {nan, TRUE} \n",
2021-07-23 12:38:56 +02:00
"10008 {Welcome to Creative Matter, a repository for ... {TRUE} \n",
"\n",
" open_access mandate \\\n",
"eprintid \n",
"1 {nan} {nan} \n",
2022-02-14 13:34:42 +01:00
"10 {nan, TRUE} {nan} \n",
2021-07-23 12:38:56 +02:00
"1000 {TRUE} {nan} \n",
2022-02-14 13:34:42 +01:00
"10001 {nan, TRUE} {nan, TRUE} \n",
2021-07-23 12:38:56 +02:00
"10008 {FALSE} {FALSE} \n",
"\n",
" organisation_title \\\n",
"eprintid \n",
"1 {nan} \n",
"10 {nan} \n",
"1000 {nan} \n",
2022-02-14 13:34:42 +01:00
"10001 {nan, Climate Service Center 2.0, Helmholtz-Ze... \n",
2021-07-23 12:38:56 +02:00
"10008 {Skidmore College} \n",
"\n",
" organisation_home_page location_country \\\n",
"eprintid \n",
"1 {nan} {nan, fr} \n",
"10 {nan} {nan, se} \n",
"1000 {nan} {pt} \n",
2022-02-14 13:34:42 +01:00
"10001 {nan, http://www.climateservicecenter.de/, htt... {nan, de} \n",
2021-07-23 12:38:56 +02:00
"10008 {http://www.skidmore.edu/} {us} \n",
"\n",
" location_city location_latitude location_longitude \\\n",
"eprintid \n",
"1 {nan} {nan} {nan} \n",
2022-02-14 13:34:42 +01:00
"10 {Uppsala, nan} {nan, 59.8667} {17.6333, nan} \n",
2021-07-23 12:38:56 +02:00
"1000 {Bellevue, WA} {47.6034} {-122.155} \n",
"10001 {nan, Hamburg} {nan, 53.5511} {nan, 9.9937} \n",
"10008 {Saratoga Springs} {43.0961} {-73.7818} \n",
"\n",
" software geoname version \\\n",
"eprintid \n",
"1 {nan, hal} {nan, geoname_2_FR} {nan, other} \n",
2022-02-14 13:34:42 +01:00
"10 {nan, diva} {nan, geoname_2_SE} {nan, other} \n",
2021-07-23 12:38:56 +02:00
"1000 {dspace} {geoname_2_PT} {other} \n",
"10001 {nan, opus} {nan, geoname_2_DE} {nan, other} \n",
"10008 {bepress} {geoname_2_US} {other} \n",
"\n",
" subjects date note suggestions \\\n",
"eprintid \n",
"1 {nan} {nan, 2002-05-17 19:24:41} {nan} {nan} \n",
"10 {nan} {nan, 2005-12-08 13:15:22} {nan} {nan} \n",
"1000 {nan} {2006-05-04 10:48:14} {nan} {nan} \n",
2022-02-14 13:34:42 +01:00
"10001 {G1, GE, HD, S1, GF} {nan, 2015-07-02 08:08:31} {nan} {nan} \n",
2021-07-23 12:38:56 +02:00
"10008 {nan} {2015-07-06 17:35:50} {nan} {nan} \n",
"\n",
" activity_low activity_medium activity_high recordcount \\\n",
"eprintid \n",
2022-02-14 13:34:42 +01:00
"1 {nan, 0} {nan, 0} {nan, 0} {nan, 25} \n",
2021-07-23 12:38:56 +02:00
"10 {nan, 0} {nan, 0} {nan, 0} {nan, 100} \n",
"1000 {nan} {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} {nan} \n",
"\n",
" recordhistory fulltexts_total \\\n",
"eprintid \n",
2022-02-14 13:34:42 +01:00
"1 {nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... {nan} \n",
"10 {nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,1... {nan} \n",
2021-07-23 12:38:56 +02:00
"1000 {nan} {nan} \n",
"10001 {nan} {nan} \n",
"10008 {nan} {nan} \n",
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs \\\n",
"eprintid \n",
"1 {nan} {nan} {nan} \n",
"10 {nan} {nan} {nan} \n",
"1000 {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} \n",
"\n",
" registry_name registry_id submit_to \\\n",
"eprintid \n",
2022-02-14 13:34:42 +01:00
"1 {opendoar, celestial} {669, 58} {nan} \n",
"10 {opendoar, celestial} {258, 526} {nan} \n",
2021-07-23 12:38:56 +02:00
"1000 {nan} {nan} {nan} \n",
2022-02-14 13:34:42 +01:00
"10001 {nan, opendoar, celestial} {3408, nan, 5881} {nan} \n",
2021-07-23 12:38:56 +02:00
"10008 {celestial} {5882} {nan} \n",
"\n",
" submitted_to_name submitted_to_done webometrics_rank \\\n",
"eprintid \n",
"1 {nan} {nan} {nan} \n",
"10 {nan} {nan} {nan} \n",
"1000 {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} \n",
"\n",
" webometrics_size webometrics_visibility webometrics_rich_files \\\n",
"eprintid \n",
"1 {nan} {nan} {nan} \n",
"10 {nan} {nan} {nan} \n",
"1000 {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} \n",
"\n",
" webometrics_scholar monthly_deposits total_deposits association \n",
"eprintid \n",
"1 {nan} {nan} {nan} {nan} \n",
"10 {nan} {nan} {nan} {nan} \n",
"1000 {nan} {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} {nan} "
]
},
"execution_count": 6,
2021-07-23 12:38:56 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df = roar_df.groupby('eprintid').aggregate(set)\n",
"roar_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
2021-07-23 12:38:56 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
" </tr>\n",
" <tr>\n",
" <th>eprintid</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>633</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/00/01</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>2011-07-18 05:40:07</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://archivesic.ccsd.cnrs.fr/</td>\n",
" <td>@RCHIVESIC</td>\n",
" <td>http://archivesic.ccsd.cnrs.fr/oai/oai.php</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>hal</td>\n",
" <td>geoname_2_FR</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2002-05-17 19:24:41</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>25</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>[opendoar, celestial]</td>\n",
" <td>[669, 58]</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>511</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/00/10</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>2011-07-18 05:40:13</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>institutional</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://www.diva-portal.org/mdh/</td>\n",
" <td>Academic Archive On-line (Mälardalen Universit...</td>\n",
" <td>http://www.diva-portal.org/oai/mdh/OAI</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>se</td>\n",
" <td>Uppsala</td>\n",
" <td>59.8667</td>\n",
" <td>17.6333</td>\n",
" <td>diva</td>\n",
" <td>geoname_2_SE</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2005-12-08 13:15:22</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>100</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>[opendoar, celestial]</td>\n",
2021-07-26 11:15:14 +02:00
" <td>[258, 526]</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1000</th>\n",
" <td>274</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/10/00</td>\n",
" <td>2010-01-06 13:45:01</td>\n",
" <td>2011-07-06 08:21:21</td>\n",
" <td>2010-01-06 13:45:01</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://pam.pisharp.org/</td>\n",
" <td>PAM - Portuguese Archive of Mathematics</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>pt</td>\n",
" <td>Bellevue, WA</td>\n",
" <td>47.6034</td>\n",
" <td>-122.155</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_PT</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2006-05-04 10:48:14</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10001</th>\n",
" <td>20</td>\n",
" <td>archive</td>\n",
" <td>91</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/01/00/01</td>\n",
" <td>2015-08-08 14:52:11</td>\n",
" <td>2016-03-21 19:44:01</td>\n",
" <td>2015-08-08 14:52:11</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://edoc.sub.uni-hamburg.de/klimawandel/</td>\n",
" <td>Klimawandel Dokumentenserver</td>\n",
" <td>http://edoc.sub.uni-hamburg.de/klimawandel/oai</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>The \"Documentenserver Klimawandel\" (Repository...</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
2022-02-14 13:34:42 +01:00
" <td>[Climate Service Center 2.0, Helmholtz-Zentrum...</td>\n",
" <td>[http://www.climateservicecenter.de/, http://w...</td>\n",
2021-07-23 12:38:56 +02:00
" <td>de</td>\n",
" <td>Hamburg</td>\n",
" <td>53.5511</td>\n",
" <td>9.9937</td>\n",
" <td>opus</td>\n",
" <td>geoname_2_DE</td>\n",
" <td>other</td>\n",
2022-02-14 13:34:42 +01:00
" <td>[G1, S1, GF, GE, HD]</td>\n",
2021-07-23 12:38:56 +02:00
" <td>2015-07-02 08:08:31</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>[opendoar, celestial]</td>\n",
" <td>[3408, 5881]</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10008</th>\n",
" <td>11</td>\n",
" <td>archive</td>\n",
" <td>404</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/01/00/08</td>\n",
" <td>2015-08-08 14:52:26</td>\n",
" <td>2016-03-21 19:43:51</td>\n",
" <td>2015-08-08 14:52:26</td>\n",
" <td>institutional</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://creativematter.skidmore.edu/</td>\n",
" <td>Creative Matter | Skidmore College Research</td>\n",
" <td>http://creativematter.skidmore.edu/do/oai/</td>\n",
" <td>NaN</td>\n",
" <td>http://creativematter.skidmore.edu/recent.rss</td>\n",
" <td>NaN</td>\n",
" <td>Welcome to Creative Matter, a repository for t...</td>\n",
" <td>TRUE</td>\n",
" <td>FALSE</td>\n",
" <td>FALSE</td>\n",
" <td>Skidmore College</td>\n",
" <td>http://www.skidmore.edu/</td>\n",
" <td>us</td>\n",
" <td>Saratoga Springs</td>\n",
" <td>43.0961</td>\n",
" <td>-73.7818</td>\n",
" <td>bepress</td>\n",
" <td>geoname_2_US</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2015-07-06 17:35:50</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>5882</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" rev_number eprint_status userid importid source dir \\\n",
"eprintid \n",
"1 633 archive 1 NaN NaN disk0/00/00/00/01 \n",
"10 511 archive 1 NaN NaN disk0/00/00/00/10 \n",
"1000 274 archive 1 NaN NaN disk0/00/00/10/00 \n",
"10001 20 archive 91 NaN NaN disk0/00/01/00/01 \n",
"10008 11 archive 404 NaN NaN disk0/00/01/00/08 \n",
"\n",
" datestamp lastmod status_changed \\\n",
"eprintid \n",
"1 2010-01-06 13:43:48 2011-07-18 05:40:07 2010-01-06 13:43:48 \n",
"10 2010-01-06 13:43:48 2011-07-18 05:40:13 2010-01-06 13:43:48 \n",
"1000 2010-01-06 13:45:01 2011-07-06 08:21:21 2010-01-06 13:45:01 \n",
"10001 2015-08-08 14:52:11 2016-03-21 19:44:01 2015-08-08 14:52:11 \n",
"10008 2015-08-08 14:52:26 2016-03-21 19:43:51 2015-08-08 14:52:26 \n",
"\n",
" type succeeds commentary metadata_visibility latitude \\\n",
"eprintid \n",
"1 subject NaN NaN show NaN \n",
"10 institutional NaN NaN show NaN \n",
"1000 subject NaN NaN show NaN \n",
"10001 subject NaN NaN show NaN \n",
"10008 institutional NaN NaN show NaN \n",
"\n",
" longitude relation_type relation_uri item_issues_id \\\n",
"eprintid \n",
"1 NaN NaN NaN NaN \n",
"10 NaN NaN NaN NaN \n",
"1000 NaN NaN NaN NaN \n",
"10001 NaN NaN NaN NaN \n",
"10008 NaN NaN NaN NaN \n",
"\n",
" item_issues_type item_issues_description item_issues_timestamp \\\n",
"eprintid \n",
"1 NaN NaN NaN \n",
"10 NaN NaN NaN \n",
"1000 NaN NaN NaN \n",
"10001 NaN NaN NaN \n",
"10008 NaN NaN NaN \n",
"\n",
" item_issues_status item_issues_reported_by item_issues_resolved_by \\\n",
"eprintid \n",
"1 NaN NaN NaN \n",
"10 NaN NaN NaN \n",
"1000 NaN NaN NaN \n",
"10001 NaN NaN NaN \n",
"10008 NaN NaN NaN \n",
"\n",
" item_issues_comment item_issues_count sword_depositor sword_slug \\\n",
"eprintid \n",
"1 NaN 0 NaN NaN \n",
"10 NaN 0 NaN NaN \n",
"1000 NaN 0 NaN NaN \n",
"10001 NaN NaN NaN NaN \n",
"10008 NaN NaN NaN NaN \n",
"\n",
" exemplar home_page \\\n",
"eprintid \n",
"1 NaN http://archivesic.ccsd.cnrs.fr/ \n",
"10 NaN http://www.diva-portal.org/mdh/ \n",
"1000 NaN http://pam.pisharp.org/ \n",
"10001 NaN http://edoc.sub.uni-hamburg.de/klimawandel/ \n",
"10008 NaN http://creativematter.skidmore.edu/ \n",
"\n",
" title \\\n",
"eprintid \n",
"1 @RCHIVESIC \n",
"10 Academic Archive On-line (Mälardalen Universit... \n",
"1000 PAM - Portuguese Archive of Mathematics \n",
"10001 Klimawandel Dokumentenserver \n",
"10008 Creative Matter | Skidmore College Research \n",
"\n",
" oai_pmh sword_endpoint \\\n",
"eprintid \n",
"1 http://archivesic.ccsd.cnrs.fr/oai/oai.php NaN \n",
"10 http://www.diva-portal.org/oai/mdh/OAI NaN \n",
"1000 NaN NaN \n",
"10001 http://edoc.sub.uni-hamburg.de/klimawandel/oai NaN \n",
"10008 http://creativematter.skidmore.edu/do/oai/ NaN \n",
"\n",
" rss_feed twitter_feed \\\n",
"eprintid \n",
"1 NaN NaN \n",
"10 NaN NaN \n",
"1000 NaN NaN \n",
"10001 NaN NaN \n",
"10008 http://creativematter.skidmore.edu/recent.rss NaN \n",
"\n",
" description fulltext \\\n",
"eprintid \n",
"1 NaN NaN \n",
"10 NaN TRUE \n",
"1000 NaN TRUE \n",
"10001 The \"Documentenserver Klimawandel\" (Repository... TRUE \n",
"10008 Welcome to Creative Matter, a repository for t... TRUE \n",
"\n",
" open_access mandate \\\n",
"eprintid \n",
"1 NaN NaN \n",
"10 TRUE NaN \n",
"1000 TRUE NaN \n",
"10001 TRUE TRUE \n",
"10008 FALSE FALSE \n",
"\n",
" organisation_title \\\n",
"eprintid \n",
"1 NaN \n",
"10 NaN \n",
"1000 NaN \n",
2022-02-14 13:34:42 +01:00
"10001 [Climate Service Center 2.0, Helmholtz-Zentrum... \n",
2021-07-23 12:38:56 +02:00
"10008 Skidmore College \n",
"\n",
" organisation_home_page location_country \\\n",
"eprintid \n",
"1 NaN fr \n",
"10 NaN se \n",
"1000 NaN pt \n",
2022-02-14 13:34:42 +01:00
"10001 [http://www.climateservicecenter.de/, http://w... de \n",
2021-07-23 12:38:56 +02:00
"10008 http://www.skidmore.edu/ us \n",
"\n",
" location_city location_latitude location_longitude software \\\n",
"eprintid \n",
"1 NaN NaN NaN hal \n",
"10 Uppsala 59.8667 17.6333 diva \n",
"1000 Bellevue, WA 47.6034 -122.155 dspace \n",
"10001 Hamburg 53.5511 9.9937 opus \n",
"10008 Saratoga Springs 43.0961 -73.7818 bepress \n",
"\n",
" geoname version subjects date \\\n",
"eprintid \n",
"1 geoname_2_FR other NaN 2002-05-17 19:24:41 \n",
"10 geoname_2_SE other NaN 2005-12-08 13:15:22 \n",
"1000 geoname_2_PT other NaN 2006-05-04 10:48:14 \n",
2022-02-14 13:34:42 +01:00
"10001 geoname_2_DE other [G1, S1, GF, GE, HD] 2015-07-02 08:08:31 \n",
2021-07-23 12:38:56 +02:00
"10008 geoname_2_US other NaN 2015-07-06 17:35:50 \n",
"\n",
" note suggestions activity_low activity_medium activity_high \\\n",
"eprintid \n",
"1 NaN NaN 0 0 0 \n",
"10 NaN NaN 0 0 0 \n",
"1000 NaN NaN NaN NaN NaN \n",
"10001 NaN NaN NaN NaN NaN \n",
"10008 NaN NaN NaN NaN NaN \n",
"\n",
" recordcount recordhistory \\\n",
"eprintid \n",
"1 25 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
"10 100 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100... \n",
"1000 NaN NaN \n",
"10001 NaN NaN \n",
"10008 NaN NaN \n",
"\n",
" fulltexts_total fulltexts_docs fulltexts_rtotal fulltexts_rdocs \\\n",
"eprintid \n",
"1 NaN NaN NaN NaN \n",
"10 NaN NaN NaN NaN \n",
"1000 NaN NaN NaN NaN \n",
"10001 NaN NaN NaN NaN \n",
"10008 NaN NaN NaN NaN \n",
"\n",
" registry_name registry_id submit_to submitted_to_name \\\n",
"eprintid \n",
2022-02-14 13:34:42 +01:00
"1 [opendoar, celestial] [669, 58] NaN NaN \n",
"10 [opendoar, celestial] [258, 526] NaN NaN \n",
2021-07-23 12:38:56 +02:00
"1000 NaN NaN NaN NaN \n",
2022-02-14 13:34:42 +01:00
"10001 [opendoar, celestial] [3408, 5881] NaN NaN \n",
2021-07-23 12:38:56 +02:00
"10008 celestial 5882 NaN NaN \n",
"\n",
" submitted_to_done webometrics_rank webometrics_size \\\n",
"eprintid \n",
"1 NaN NaN NaN \n",
"10 NaN NaN NaN \n",
"1000 NaN NaN NaN \n",
"10001 NaN NaN NaN \n",
"10008 NaN NaN NaN \n",
"\n",
" webometrics_visibility webometrics_rich_files webometrics_scholar \\\n",
"eprintid \n",
"1 NaN NaN NaN \n",
"10 NaN NaN NaN \n",
"1000 NaN NaN NaN \n",
"10001 NaN NaN NaN \n",
"10008 NaN NaN NaN \n",
"\n",
" monthly_deposits total_deposits association \n",
"eprintid \n",
"1 NaN NaN NaN \n",
"10 NaN NaN NaN \n",
"1000 NaN NaN NaN \n",
"10001 NaN NaN NaN \n",
"10008 NaN NaN NaN "
]
},
"execution_count": 7,
2021-07-23 12:38:56 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def value_or_list(cell_set):\n",
" copy = set(cell_set)\n",
" copy.discard(np.nan) \n",
" if len(copy) == 0:\n",
" return np.nan\n",
" if len(copy) == 1:\n",
" return copy.pop()\n",
" return list(copy)\n",
" \n",
"roar_df = roar_df.applymap(value_or_list)\n",
"roar_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
2021-07-23 12:38:56 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>eprintid</th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2022-02-14 13:34:42 +01:00
" <th>4259</th>\n",
2021-07-23 12:38:56 +02:00
" <td>606</td>\n",
" <td>657</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/06/06</td>\n",
" <td>2010-01-06 13:44:31</td>\n",
" <td>2016-04-17 21:53:14</td>\n",
" <td>2010-01-06 13:44:31</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://hal.archives-ouvertes.fr/</td>\n",
" <td>HAL: Hyper Article en Ligne</td>\n",
" <td>http://hal.archives-ouvertes.fr/oai/oai.php</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>hal</td>\n",
" <td>geoname_2_FR</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>1998-11-02 11:53:57</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>675816</td>\n",
" <td>4,12,17,26,43,57,81,185,431,861,1184,1517,2442...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>[opendoar, celestial, roarmap]</td>\n",
" <td>[166, 1106, 69]</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>3063</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" eprintid rev_number eprint_status userid importid source \\\n",
2022-02-14 13:34:42 +01:00
"4259 606 657 archive 1 NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" dir datestamp lastmod \\\n",
2022-02-14 13:34:42 +01:00
"4259 disk0/00/00/06/06 2010-01-06 13:44:31 2016-04-17 21:53:14 \n",
2021-07-23 12:38:56 +02:00
"\n",
" status_changed type succeeds commentary metadata_visibility \\\n",
2022-02-14 13:34:42 +01:00
"4259 2010-01-06 13:44:31 subject NaN NaN show \n",
2021-07-23 12:38:56 +02:00
"\n",
" latitude longitude relation_type relation_uri item_issues_id \\\n",
2022-02-14 13:34:42 +01:00
"4259 NaN NaN NaN NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" item_issues_type item_issues_description item_issues_timestamp \\\n",
2022-02-14 13:34:42 +01:00
"4259 NaN NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" item_issues_status item_issues_reported_by item_issues_resolved_by \\\n",
2022-02-14 13:34:42 +01:00
"4259 NaN NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" item_issues_comment item_issues_count sword_depositor sword_slug \\\n",
2022-02-14 13:34:42 +01:00
"4259 NaN 0 NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" exemplar home_page title \\\n",
2022-02-14 13:34:42 +01:00
"4259 NaN http://hal.archives-ouvertes.fr/ HAL: Hyper Article en Ligne \n",
2021-07-23 12:38:56 +02:00
"\n",
" oai_pmh sword_endpoint rss_feed \\\n",
2022-02-14 13:34:42 +01:00
"4259 http://hal.archives-ouvertes.fr/oai/oai.php NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" twitter_feed description fulltext open_access mandate organisation_title \\\n",
2022-02-14 13:34:42 +01:00
"4259 NaN NaN TRUE TRUE NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" organisation_home_page location_country location_city location_latitude \\\n",
2022-02-14 13:34:42 +01:00
"4259 NaN fr NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" location_longitude software geoname version subjects \\\n",
2022-02-14 13:34:42 +01:00
"4259 NaN hal geoname_2_FR other NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" date note suggestions activity_low activity_medium \\\n",
2022-02-14 13:34:42 +01:00
"4259 1998-11-02 11:53:57 NaN NaN 0 0 \n",
2021-07-23 12:38:56 +02:00
"\n",
" activity_high recordcount \\\n",
2022-02-14 13:34:42 +01:00
"4259 2 675816 \n",
2021-07-23 12:38:56 +02:00
"\n",
" recordhistory fulltexts_total \\\n",
2022-02-14 13:34:42 +01:00
"4259 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs \\\n",
2022-02-14 13:34:42 +01:00
"4259 NaN NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" registry_name registry_id submit_to \\\n",
2022-02-14 13:34:42 +01:00
"4259 [opendoar, celestial, roarmap] [166, 1106, 69] NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" submitted_to_name submitted_to_done webometrics_rank webometrics_size \\\n",
2022-02-14 13:34:42 +01:00
"4259 NaN NaN 1 6 \n",
2021-07-23 12:38:56 +02:00
"\n",
" webometrics_visibility webometrics_rich_files webometrics_scholar \\\n",
2022-02-14 13:34:42 +01:00
"4259 1 1 1 \n",
2021-07-23 12:38:56 +02:00
"\n",
" monthly_deposits total_deposits \\\n",
2022-02-14 13:34:42 +01:00
"4259 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 3063 \n",
2021-07-23 12:38:56 +02:00
"\n",
" association \n",
2022-02-14 13:34:42 +01:00
"4259 NaN "
2021-07-23 12:38:56 +02:00
]
},
"execution_count": 8,
2021-07-23 12:38:56 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df.reset_index(inplace=True)\n",
"roar_df[roar_df.eprintid == '606']"
]
},
{
"cell_type": "code",
"execution_count": 9,
2021-07-23 12:38:56 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>eprintid</th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
2022-02-14 13:34:42 +01:00
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>5444</td>\n",
2021-07-23 12:38:56 +02:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>5444</td>\n",
" <td>108</td>\n",
2021-07-23 12:38:56 +02:00
" <td>0.0</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5444</td>\n",
2021-07-23 12:38:56 +02:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>63</td>\n",
" <td>63</td>\n",
" <td>63</td>\n",
" <td>63</td>\n",
" <td>63</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2242</td>\n",
2021-07-23 12:38:56 +02:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2022-02-14 13:34:42 +01:00
" <td>268</td>\n",
" <td>5437</td>\n",
" <td>5442</td>\n",
" <td>4332</td>\n",
" <td>178</td>\n",
" <td>1538</td>\n",
" <td>116</td>\n",
" <td>3837</td>\n",
" <td>4197</td>\n",
" <td>4197</td>\n",
" <td>3746</td>\n",
" <td>4460</td>\n",
" <td>4286</td>\n",
" <td>5138</td>\n",
" <td>3714</td>\n",
" <td>3725</td>\n",
" <td>3708</td>\n",
" <td>4700</td>\n",
" <td>4730</td>\n",
" <td>5444</td>\n",
" <td>1289</td>\n",
" <td>5429</td>\n",
" <td>218</td>\n",
" <td>189</td>\n",
" <td>2288</td>\n",
" <td>2288</td>\n",
" <td>2288</td>\n",
" <td>2290</td>\n",
" <td>2288</td>\n",
2021-07-23 12:38:56 +02:00
" <td>270</td>\n",
" <td>258</td>\n",
" <td>270</td>\n",
" <td>258</td>\n",
2022-02-14 13:34:42 +01:00
" <td>4605</td>\n",
" <td>4580</td>\n",
" <td>375</td>\n",
2021-07-23 12:38:56 +02:00
" <td>205</td>\n",
" <td>205</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>756</td>\n",
" <td>756</td>\n",
2022-02-14 13:34:42 +01:00
" <td>223</td>\n",
2021-07-23 12:38:56 +02:00
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
2022-02-14 13:34:42 +01:00
" <td>5444</td>\n",
" <td>660</td>\n",
2021-07-23 12:38:56 +02:00
" <td>1</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2189</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5444</td>\n",
" <td>4198</td>\n",
" <td>4043</td>\n",
" <td>4230</td>\n",
2021-07-23 12:38:56 +02:00
" <td>12</td>\n",
2022-02-14 13:34:42 +01:00
" <td>108</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>48</td>\n",
" <td>5</td>\n",
" <td>62</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5271</td>\n",
" <td>5143</td>\n",
" <td>4059</td>\n",
" <td>172</td>\n",
" <td>1485</td>\n",
" <td>112</td>\n",
" <td>3359</td>\n",
2021-07-23 12:38:56 +02:00
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
2022-02-14 13:34:42 +01:00
" <td>3858</td>\n",
" <td>3831</td>\n",
" <td>144</td>\n",
" <td>1884</td>\n",
" <td>2923</td>\n",
" <td>2953</td>\n",
2021-07-23 12:38:56 +02:00
" <td>31</td>\n",
" <td>126</td>\n",
" <td>53</td>\n",
2022-02-14 13:34:42 +01:00
" <td>938</td>\n",
" <td>4898</td>\n",
" <td>210</td>\n",
" <td>173</td>\n",
2021-07-23 12:38:56 +02:00
" <td>72</td>\n",
" <td>54</td>\n",
" <td>16</td>\n",
" <td>741</td>\n",
2022-02-14 13:34:42 +01:00
" <td>1702</td>\n",
2021-07-23 12:38:56 +02:00
" <td>135</td>\n",
" <td>118</td>\n",
" <td>134</td>\n",
" <td>117</td>\n",
2022-02-14 13:34:42 +01:00
" <td>9</td>\n",
" <td>4259</td>\n",
2021-07-23 12:38:56 +02:00
" <td>7</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>146</td>\n",
" <td>143</td>\n",
" <td>346</td>\n",
" <td>342</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>1</td>\n",
2021-07-23 12:38:56 +02:00
" <td>11</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/00/01</td>\n",
2021-07-23 12:38:56 +02:00
" <td>2010-01-06 13:43:48</td>\n",
" <td>2011-07-06 08:24:53</td>\n",
2021-07-23 12:38:56 +02:00
" <td>2010-01-06 13:43:48</td>\n",
" <td>institutional</td>\n",
" <td>10164</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>bad_oai_pmh_url_0</td>\n",
" <td>duplicate_title</td>\n",
" <td>Duplicate title to &lt;xhtml:table xmlns:xhtml=\"h...</td>\n",
" <td>2010-01-13 10:44:49</td>\n",
" <td>discovered</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>FALSE</td>\n",
" <td>http://eprints.upnjatim.ac.id/</td>\n",
" <td>Repositorio Institucional</td>\n",
" <td>http://kce.docressources.info/ws/PMBWs_2</td>\n",
" <td>http://producao.usp.br/sword/servicedocument</td>\n",
2021-07-23 12:38:56 +02:00
" <td>http://eprints.upnjatim.ac.id/cgi/latest_tool?...</td>\n",
" <td>http://my.indexcopernicus.com/fredemoreno</td>\n",
2021-07-23 12:38:56 +02:00
" <td>info:other:archives.eprints.org:import</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>FALSE</td>\n",
" <td>Chinese Academy of Science (中国科学院)</td>\n",
" <td>http://www.cas.cn/</td>\n",
" <td>us</td>\n",
" <td>Lima</td>\n",
" <td>34.1607</td>\n",
" <td>-118.139</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_US</td>\n",
" <td>other</td>\n",
" <td>K1</td>\n",
" <td>2006-05-04 10:48:14</td>\n",
2022-02-14 13:34:42 +01:00
" <td>¿Quién puede depositar documentos en el reposi...</td>\n",
2021-07-23 12:38:56 +02:00
" <td>This repository is hosted by the Texas Digital...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>100</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2022-02-14 13:34:42 +01:00
" <td>[opendoar, celestial]</td>\n",
" <td>[1879, 2246]</td>\n",
" <td>[opendoar, celestial, roarmap]</td>\n",
2021-07-23 12:38:56 +02:00
" <td>opendoar</td>\n",
" <td>2021-01-25</td>\n",
" <td>24</td>\n",
" <td>46</td>\n",
" <td>20</td>\n",
2021-07-23 12:38:56 +02:00
" <td>824</td>\n",
" <td>806</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>0</td>\n",
" <td>russell_group</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>1</td>\n",
2022-02-14 13:34:42 +01:00
" <td>333</td>\n",
" <td>5444</td>\n",
" <td>1330</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>16</td>\n",
" <td>8</td>\n",
" <td>16</td>\n",
2022-02-14 13:34:42 +01:00
" <td>3853</td>\n",
2021-07-23 12:38:56 +02:00
" <td>1</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>5402</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>15</td>\n",
" <td>33</td>\n",
" <td>2</td>\n",
" <td>45</td>\n",
" <td>38</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2201</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2022-02-14 13:34:42 +01:00
" <td>261</td>\n",
2021-07-23 12:38:56 +02:00
" <td>4</td>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>112</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2805</td>\n",
" <td>2696</td>\n",
" <td>2748</td>\n",
2021-07-23 12:38:56 +02:00
" <td>9</td>\n",
" <td>9</td>\n",
2022-02-14 13:34:42 +01:00
" <td>891</td>\n",
" <td>74</td>\n",
2021-07-23 12:38:56 +02:00
" <td>25</td>\n",
" <td>25</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2341</td>\n",
" <td>845</td>\n",
" <td>4841</td>\n",
2021-07-23 12:38:56 +02:00
" <td>53</td>\n",
" <td>99</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2012</td>\n",
" <td>2074</td>\n",
" <td>2210</td>\n",
" <td>730</td>\n",
2021-07-23 12:38:56 +02:00
" <td>95</td>\n",
" <td>113</td>\n",
" <td>114</td>\n",
" <td>113</td>\n",
" <td>114</td>\n",
2022-02-14 13:34:42 +01:00
" <td>2106</td>\n",
2021-07-23 12:38:56 +02:00
" <td>4</td>\n",
2022-02-14 13:34:42 +01:00
" <td>119</td>\n",
2021-07-23 12:38:56 +02:00
" <td>205</td>\n",
" <td>205</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>387</td>\n",
" <td>387</td>\n",
2022-02-14 13:34:42 +01:00
" <td>130</td>\n",
2021-07-23 12:38:56 +02:00
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
2021-07-23 12:38:56 +02:00
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2021-07-23 12:38:56 +02:00
" eprintid rev_number eprint_status userid importid source \\\n",
2022-02-14 13:34:42 +01:00
"count 5444 5444 5444 5444 0.0 0.0 \n",
"unique 5444 660 1 2189 NaN NaN \n",
"top 1 11 archive 1 NaN NaN \n",
2022-02-14 13:34:42 +01:00
"freq 1 333 5444 1330 NaN NaN \n",
2021-07-23 12:38:56 +02:00
"mean NaN NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN NaN \n",
"\n",
" dir datestamp lastmod \\\n",
2022-02-14 13:34:42 +01:00
"count 5444 5444 5444 \n",
"unique 5444 4198 4043 \n",
"top disk0/00/00/00/01 2010-01-06 13:43:48 2011-07-06 08:24:53 \n",
2021-07-23 12:38:56 +02:00
"freq 1 16 8 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" status_changed type succeeds commentary \\\n",
2022-02-14 13:34:42 +01:00
"count 5444 5444 108 0.0 \n",
"unique 4230 12 108 NaN \n",
"top 2010-01-06 13:43:48 institutional 10164 NaN \n",
2022-02-14 13:34:42 +01:00
"freq 16 3853 1 NaN \n",
2021-07-23 12:38:56 +02:00
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n",
"\n",
" metadata_visibility latitude longitude relation_type relation_uri \\\n",
2022-02-14 13:34:42 +01:00
"count 5444 0.0 0.0 0.0 0.0 \n",
2021-07-23 12:38:56 +02:00
"unique 2 NaN NaN NaN NaN \n",
"top show NaN NaN NaN NaN \n",
2022-02-14 13:34:42 +01:00
"freq 5402 NaN NaN NaN NaN \n",
2021-07-23 12:38:56 +02:00
"mean NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN \n",
"\n",
" item_issues_id item_issues_type \\\n",
"count 63 63 \n",
"unique 48 5 \n",
"top bad_oai_pmh_url_0 duplicate_title \n",
"freq 15 33 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" item_issues_description \\\n",
"count 63 \n",
"unique 62 \n",
"top Duplicate title to <xhtml:table xmlns:xhtml=\"h... \n",
"freq 2 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
"count 63 63 0.0 \n",
"unique 4 3 NaN \n",
"top 2010-01-13 10:44:49 discovered NaN \n",
"freq 45 38 NaN \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
2022-02-14 13:34:42 +01:00
"count 0.0 0.0 2242 \n",
2021-07-23 12:38:56 +02:00
"unique NaN NaN 4 \n",
"top NaN NaN 0 \n",
2022-02-14 13:34:42 +01:00
"freq NaN NaN 2201 \n",
2021-07-23 12:38:56 +02:00
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" sword_depositor sword_slug exemplar home_page \\\n",
2022-02-14 13:34:42 +01:00
"count 0.0 0.0 268 5437 \n",
"unique NaN NaN 2 5271 \n",
2021-07-23 12:38:56 +02:00
"top NaN NaN FALSE http://eprints.upnjatim.ac.id/ \n",
2022-02-14 13:34:42 +01:00
"freq NaN NaN 261 4 \n",
2021-07-23 12:38:56 +02:00
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n",
"\n",
" title oai_pmh \\\n",
2022-02-14 13:34:42 +01:00
"count 5442 4332 \n",
"unique 5143 4059 \n",
2021-07-23 12:38:56 +02:00
"top Repositorio Institucional http://kce.docressources.info/ws/PMBWs_2 \n",
"freq 7 4 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" sword_endpoint \\\n",
2022-02-14 13:34:42 +01:00
"count 178 \n",
"unique 172 \n",
"top http://producao.usp.br/sword/servicedocument \n",
"freq 2 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" rss_feed \\\n",
2022-02-14 13:34:42 +01:00
"count 1538 \n",
"unique 1485 \n",
2021-07-23 12:38:56 +02:00
"top http://eprints.upnjatim.ac.id/cgi/latest_tool?... \n",
"freq 5 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" twitter_feed \\\n",
2022-02-14 13:34:42 +01:00
"count 116 \n",
"unique 112 \n",
"top http://my.indexcopernicus.com/fredemoreno \n",
"freq 2 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" description fulltext open_access mandate \\\n",
2022-02-14 13:34:42 +01:00
"count 3837 4197 4197 3746 \n",
"unique 3359 2 2 2 \n",
"top info:other:archives.eprints.org:import TRUE TRUE FALSE \n",
2022-02-14 13:34:42 +01:00
"freq 112 2805 2696 2748 \n",
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" organisation_title organisation_home_page \\\n",
2022-02-14 13:34:42 +01:00
"count 4460 4286 \n",
"unique 3858 3831 \n",
"top Chinese Academy of Science (中国科学院) http://www.cas.cn/ \n",
"freq 9 9 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" location_country location_city location_latitude location_longitude \\\n",
2022-02-14 13:34:42 +01:00
"count 5138 3714 3725 3708 \n",
"unique 144 1884 2923 2953 \n",
"top us Lima 34.1607 -118.139 \n",
2022-02-14 13:34:42 +01:00
"freq 891 74 25 25 \n",
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" software geoname version subjects date \\\n",
2022-02-14 13:34:42 +01:00
"count 4700 4730 5444 1289 5429 \n",
"unique 31 126 53 938 4898 \n",
"top dspace geoname_2_US other K1 2006-05-04 10:48:14 \n",
2022-02-14 13:34:42 +01:00
"freq 2341 845 4841 53 99 \n",
"mean NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" note \\\n",
2022-02-14 13:34:42 +01:00
"count 218 \n",
"unique 210 \n",
"top ¿Quién puede depositar documentos en el reposi... \n",
2021-07-23 12:38:56 +02:00
"freq 2 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" suggestions activity_low \\\n",
2022-02-14 13:34:42 +01:00
"count 189 2288 \n",
"unique 173 72 \n",
2021-07-23 12:38:56 +02:00
"top This repository is hosted by the Texas Digital... 0 \n",
2022-02-14 13:34:42 +01:00
"freq 9 2012 \n",
2021-07-23 12:38:56 +02:00
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" activity_medium activity_high recordcount \\\n",
2022-02-14 13:34:42 +01:00
"count 2288 2288 2290 \n",
2021-07-23 12:38:56 +02:00
"unique 54 16 741 \n",
"top 0 0 100 \n",
2022-02-14 13:34:42 +01:00
"freq 2074 2210 730 \n",
2021-07-23 12:38:56 +02:00
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" recordhistory fulltexts_total \\\n",
2022-02-14 13:34:42 +01:00
"count 2288 270 \n",
"unique 1702 135 \n",
2021-07-23 12:38:56 +02:00
"top 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 0 \n",
"freq 95 113 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name \\\n",
2022-02-14 13:34:42 +01:00
"count 258 270 258 4605 \n",
"unique 118 134 117 9 \n",
"top 0 0 0 [opendoar, celestial] \n",
"freq 114 113 114 2106 \n",
2021-07-23 12:38:56 +02:00
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n",
"\n",
2022-02-14 13:34:42 +01:00
" registry_id submit_to submitted_to_name \\\n",
"count 4580 375 205 \n",
"unique 4259 7 1 \n",
"top [1879, 2246] [opendoar, celestial, roarmap] opendoar \n",
"freq 4 119 205 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" submitted_to_done webometrics_rank webometrics_size \\\n",
"count 205 148 148 \n",
"unique 1 148 148 \n",
"top 2021-01-25 24 46 \n",
2021-07-23 12:38:56 +02:00
"freq 205 1 1 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" webometrics_visibility webometrics_rich_files webometrics_scholar \\\n",
"count 148 148 148 \n",
"unique 148 146 143 \n",
"top 20 824 806 \n",
2021-07-23 12:38:56 +02:00
"freq 1 3 5 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" monthly_deposits total_deposits \\\n",
"count 756 756 \n",
"unique 346 342 \n",
"top 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 0 \n",
"freq 387 387 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" association \n",
2022-02-14 13:34:42 +01:00
"count 223 \n",
2021-07-23 12:38:56 +02:00
"unique 3 \n",
"top russell_group \n",
2022-02-14 13:34:42 +01:00
"freq 130 \n",
2021-07-23 12:38:56 +02:00
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN "
2021-07-22 11:35:40 +02:00
]
},
"execution_count": 9,
2021-07-22 11:35:40 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 10,
2021-07-22 11:35:40 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2021-07-23 12:38:56 +02:00
"eprintid 0\n",
"rev_number 0\n",
"eprint_status 0\n",
"userid 0\n",
2022-02-14 13:34:42 +01:00
"importid 5444\n",
"source 5444\n",
2021-07-23 12:38:56 +02:00
"dir 0\n",
"datestamp 0\n",
"lastmod 0\n",
"status_changed 0\n",
"type 0\n",
2022-02-14 13:34:42 +01:00
"succeeds 5336\n",
"commentary 5444\n",
2021-07-23 12:38:56 +02:00
"metadata_visibility 0\n",
2022-02-14 13:34:42 +01:00
"latitude 5444\n",
"longitude 5444\n",
"relation_type 5444\n",
"relation_uri 5444\n",
"item_issues_id 5381\n",
"item_issues_type 5381\n",
"item_issues_description 5381\n",
"item_issues_timestamp 5381\n",
"item_issues_status 5381\n",
"item_issues_reported_by 5444\n",
"item_issues_resolved_by 5444\n",
"item_issues_comment 5444\n",
"item_issues_count 3202\n",
"sword_depositor 5444\n",
"sword_slug 5444\n",
"exemplar 5176\n",
2021-07-23 12:38:56 +02:00
"home_page 7\n",
"title 2\n",
2022-02-14 13:34:42 +01:00
"oai_pmh 1112\n",
"sword_endpoint 5266\n",
"rss_feed 3906\n",
"twitter_feed 5328\n",
"description 1607\n",
"fulltext 1247\n",
"open_access 1247\n",
"mandate 1698\n",
2021-07-23 12:38:56 +02:00
"dtype: int64"
2021-07-22 11:35:40 +02:00
]
},
"execution_count": 10,
2021-07-22 11:35:40 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-07-23 12:38:56 +02:00
"roar_df.isna().sum()[:40]"
2021-07-22 11:35:40 +02:00
]
},
{
"cell_type": "code",
"execution_count": 11,
2021-07-22 11:35:40 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2022-02-14 13:34:42 +01:00
"organisation_title 984\n",
"organisation_home_page 1158\n",
"location_country 306\n",
"location_city 1730\n",
"location_latitude 1719\n",
"location_longitude 1736\n",
"software 744\n",
"geoname 714\n",
2021-07-23 12:38:56 +02:00
"version 0\n",
2022-02-14 13:34:42 +01:00
"subjects 4155\n",
2021-07-23 12:38:56 +02:00
"date 15\n",
2022-02-14 13:34:42 +01:00
"note 5226\n",
"suggestions 5255\n",
"activity_low 3156\n",
"activity_medium 3156\n",
"activity_high 3156\n",
"recordcount 3154\n",
"recordhistory 3156\n",
"fulltexts_total 5174\n",
"fulltexts_docs 5186\n",
"fulltexts_rtotal 5174\n",
"fulltexts_rdocs 5186\n",
"registry_name 839\n",
"registry_id 864\n",
"submit_to 5069\n",
"submitted_to_name 5239\n",
"submitted_to_done 5239\n",
"webometrics_rank 5296\n",
"webometrics_size 5296\n",
"webometrics_visibility 5296\n",
"webometrics_rich_files 5296\n",
"webometrics_scholar 5296\n",
"monthly_deposits 4688\n",
"total_deposits 4688\n",
"association 5221\n",
2021-07-23 12:38:56 +02:00
"dtype: int64"
2021-07-22 11:35:40 +02:00
]
},
"execution_count": 11,
2021-07-22 11:35:40 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-07-23 12:38:56 +02:00
"roar_df.isna().sum()[40:]"
2021-07-22 11:35:40 +02:00
]
2021-07-23 12:38:56 +02:00
},
2021-07-26 11:15:14 +02:00
{
"cell_type": "code",
"execution_count": 12,
2021-07-26 11:15:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"type\n",
"database 74\n",
"demonstration 20\n",
2022-02-14 13:34:42 +01:00
"institutional 3853\n",
"journal 125\n",
"learning 77\n",
2022-02-14 13:34:42 +01:00
"multi 143\n",
"opendata 41\n",
2022-02-14 13:34:42 +01:00
"other 410\n",
"researchdata 55\n",
"subject 295\n",
"theses 349\n",
"webobservatory 2\n",
"dtype: int64"
2021-07-26 11:15:14 +02:00
]
},
"execution_count": 12,
2021-07-26 11:15:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(roar_df.type).groupby('type').size()"
2021-07-26 11:15:14 +02:00
]
},
{
"cell_type": "code",
"execution_count": 13,
2021-07-26 11:15:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"open_access\n",
2022-02-14 13:34:42 +01:00
"FALSE 1501\n",
"TRUE 2696\n",
"dtype: int64"
2021-07-26 11:15:14 +02:00
]
},
"execution_count": 13,
2021-07-26 11:15:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(roar_df.open_access).groupby('open_access').size()"
2021-07-26 11:15:14 +02:00
]
},
{
"cell_type": "code",
"execution_count": 14,
2021-07-26 11:15:14 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"mandate\n",
2022-02-14 13:34:42 +01:00
"FALSE 2748\n",
"TRUE 998\n",
"dtype: int64"
2021-07-26 11:15:14 +02:00
]
},
"execution_count": 14,
2021-07-26 11:15:14 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(roar_df.mandate).groupby('mandate').size()"
2021-07-26 11:15:14 +02:00
]
},
2021-07-23 12:38:56 +02:00
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
2021-07-22 11:35:40 +02:00
}
],
"metadata": {
"kernelspec": {
2022-02-14 13:34:42 +01:00
"display_name": "Python 3 (ipykernel)",
2021-07-22 11:35:40 +02:00
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}