registries_analysis/notebooks/01.3-exploration-roar.ipynb

5584 lines
229 KiB
Plaintext
Raw Normal View History

2021-07-22 11:35:40 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import ast\n",
"import csv\n",
"import json\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n",
"import plotly.express as px\n",
"\n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Loading datasets"
]
},
{
"cell_type": "code",
2021-07-23 12:38:56 +02:00
"execution_count": 2,
2021-07-22 11:35:40 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2021-07-23 12:38:56 +02:00
" <th>eprintid</th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
2021-07-22 11:35:40 +02:00
" <th>type</th>\n",
2021-07-23 12:38:56 +02:00
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
2021-07-22 11:35:40 +02:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2021-07-23 12:38:56 +02:00
" <td>921</td>\n",
" <td>517</td>\n",
" <td>archive</td>\n",
2021-07-22 11:35:40 +02:00
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>disk0/00/00/09/21</td>\n",
" <td>2010-01-06 13:44:55</td>\n",
" <td>2016-04-17 21:54:11</td>\n",
" <td>2010-01-06 13:44:55</td>\n",
" <td>theses</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>show</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>0</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>FALSE</td>\n",
" <td>http://alcme.oclc.org/ndltd/index.html</td>\n",
" <td>Networked Digital Library of Theses and Disser...</td>\n",
" <td>http://alcme.oclc.org/ndltd/servlet/OAIHandler</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>us</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>etddb</td>\n",
" <td>geoname_2_US</td>\n",
" <td>other</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>2006-04-18</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1533400</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,94862,168074,5...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>5</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>0</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1489</td>\n",
2021-07-23 12:38:56 +02:00
" <td>556</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/14/89</td>\n",
" <td>2010-01-06 13:46:05</td>\n",
" <td>2016-04-17 21:49:00</td>\n",
" <td>2010-01-06 13:46:05</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" <td>http://prensahistorica.mcu.es/prensahistorica/...</td>\n",
" <td>Virtual Library of Historical Press</td>\n",
2021-07-23 12:38:56 +02:00
" <td>http://prensahistorica.mcu.es/prensahistorica/...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>The Virtual Library of Historical Press is the...</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" <td>es</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>geoname_2_ES</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2006-04-04 00:00:00</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1008486</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1431,1431,1761...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>858</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>47500</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>606</td>\n",
2021-07-23 12:38:56 +02:00
" <td>657</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/06/06</td>\n",
" <td>2010-01-06 13:44:31</td>\n",
" <td>2016-04-17 21:53:14</td>\n",
" <td>2010-01-06 13:44:31</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" <td>http://hal.archives-ouvertes.fr/</td>\n",
" <td>HAL: Hyper Article en Ligne</td>\n",
2021-07-23 12:38:56 +02:00
" <td>http://hal.archives-ouvertes.fr/oai/oai.php</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" <td>fr</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>hal</td>\n",
" <td>geoname_2_FR</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>1998-11-02 11:53:57</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>675816</td>\n",
" <td>4,12,17,26,43,57,81,185,431,861,1184,1517,2442...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>opendoar</td>\n",
" <td>166</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>3063</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>606</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>1106</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>606</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roarmap</td>\n",
" <td>69</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" eprintid rev_number eprint_status userid importid source dir \\\n",
"0 921 517 archive 1 NaN NaN disk0/00/00/09/21 \n",
"1 1489 556 archive 1 NaN NaN disk0/00/00/14/89 \n",
"2 606 657 archive 1 NaN NaN disk0/00/00/06/06 \n",
"3 606 NaN NaN NaN NaN NaN NaN \n",
"4 606 NaN NaN NaN NaN NaN NaN \n",
"\n",
" datestamp lastmod status_changed type \\\n",
"0 2010-01-06 13:44:55 2016-04-17 21:54:11 2010-01-06 13:44:55 theses \n",
"1 2010-01-06 13:46:05 2016-04-17 21:49:00 2010-01-06 13:46:05 other \n",
"2 2010-01-06 13:44:31 2016-04-17 21:53:14 2010-01-06 13:44:31 subject \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" succeeds commentary metadata_visibility latitude longitude relation_type \\\n",
"0 NaN NaN show NaN NaN NaN \n",
"1 NaN NaN show NaN NaN NaN \n",
"2 NaN NaN show NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN \n",
"\n",
" relation_uri item_issues_id item_issues_type item_issues_description \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
"0 NaN NaN 0 \n",
"1 NaN NaN 0 \n",
"2 NaN NaN 0 \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" sword_depositor sword_slug exemplar \\\n",
"0 NaN NaN FALSE \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" home_page \\\n",
"0 http://alcme.oclc.org/ndltd/index.html \n",
"1 http://prensahistorica.mcu.es/prensahistorica/... \n",
"2 http://hal.archives-ouvertes.fr/ \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" title \\\n",
"0 Networked Digital Library of Theses and Disser... \n",
"1 Virtual Library of Historical Press \n",
"2 HAL: Hyper Article en Ligne \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" oai_pmh sword_endpoint rss_feed \\\n",
"0 http://alcme.oclc.org/ndltd/servlet/OAIHandler NaN NaN \n",
"1 http://prensahistorica.mcu.es/prensahistorica/... NaN NaN \n",
"2 http://hal.archives-ouvertes.fr/oai/oai.php NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" twitter_feed description fulltext \\\n",
"0 NaN NaN TRUE \n",
"1 NaN The Virtual Library of Historical Press is the... TRUE \n",
"2 NaN NaN TRUE \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" open_access mandate organisation_title organisation_home_page \\\n",
"0 TRUE NaN NaN NaN \n",
"1 TRUE NaN NaN NaN \n",
"2 TRUE NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" location_country location_city location_latitude location_longitude \\\n",
"0 us NaN NaN NaN \n",
"1 es NaN NaN NaN \n",
"2 fr NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" software geoname version subjects date note \\\n",
"0 etddb geoname_2_US other NaN 2006-04-18 NaN \n",
"1 other geoname_2_ES other NaN 2006-04-04 00:00:00 NaN \n",
"2 hal geoname_2_FR other NaN 1998-11-02 11:53:57 NaN \n",
"3 NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN \n",
"\n",
" suggestions activity_low activity_medium activity_high recordcount \\\n",
"0 NaN 0 0 0 1533400 \n",
"1 NaN 0 0 0 1008486 \n",
"2 NaN 0 0 2 675816 \n",
"3 NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN \n",
"\n",
" recordhistory fulltexts_total \\\n",
"0 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,94862,168074,5... NaN \n",
"1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1431,1431,1761... NaN \n",
"2 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name registry_id \\\n",
"0 NaN NaN NaN celestial 5 \n",
"1 NaN NaN NaN celestial 858 \n",
"2 NaN NaN NaN opendoar 166 \n",
"3 NaN NaN NaN celestial 1106 \n",
"4 NaN NaN NaN roarmap 69 \n",
"\n",
" submit_to submitted_to_name submitted_to_done webometrics_rank \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN 1 \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" webometrics_size webometrics_visibility webometrics_rich_files \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 6 1 1 \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" webometrics_scholar monthly_deposits \\\n",
"0 NaN 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
"1 NaN 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
"2 1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" total_deposits association \n",
"0 0 NaN \n",
"1 47500 NaN \n",
"2 3063 NaN \n",
"3 NaN NaN \n",
"4 NaN NaN "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df = pd.read_csv('../data/raw/export_roar_CSV.csv', dtype='str')\n",
"roar_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['eprintid', 'rev_number', 'eprint_status', 'userid', 'importid',\n",
" 'source', 'dir', 'datestamp', 'lastmod', 'status_changed', 'type',\n",
" 'succeeds', 'commentary', 'metadata_visibility', 'latitude',\n",
" 'longitude', 'relation_type', 'relation_uri', 'item_issues_id',\n",
" 'item_issues_type', 'item_issues_description', 'item_issues_timestamp',\n",
" 'item_issues_status', 'item_issues_reported_by',\n",
" 'item_issues_resolved_by', 'item_issues_comment', 'item_issues_count',\n",
" 'sword_depositor', 'sword_slug', 'exemplar', 'home_page', 'title',\n",
" 'oai_pmh', 'sword_endpoint', 'rss_feed', 'twitter_feed', 'description',\n",
" 'fulltext', 'open_access', 'mandate', 'organisation_title',\n",
" 'organisation_home_page', 'location_country', 'location_city',\n",
" 'location_latitude', 'location_longitude', 'software', 'geoname',\n",
" 'version', 'subjects', 'date', 'note', 'suggestions', 'activity_low',\n",
" 'activity_medium', 'activity_high', 'recordcount', 'recordhistory',\n",
" 'fulltexts_total', 'fulltexts_docs', 'fulltexts_rtotal',\n",
" 'fulltexts_rdocs', 'registry_name', 'registry_id', 'submit_to',\n",
" 'submitted_to_name', 'submitted_to_done', 'webometrics_rank',\n",
" 'webometrics_size', 'webometrics_visibility', 'webometrics_rich_files',\n",
" 'webometrics_scholar', 'monthly_deposits', 'total_deposits',\n",
" 'association'],\n",
" dtype='object')"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df.columns"
]
},
{
"cell_type": "code",
"execution_count": 4,
2021-07-22 11:35:40 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>eprintid</th>\n",
2021-07-23 12:38:56 +02:00
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
2021-07-22 11:35:40 +02:00
" <th>home_page</th>\n",
" <th>title</th>\n",
2021-07-23 12:38:56 +02:00
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
2021-07-22 11:35:40 +02:00
" <th>location_country</th>\n",
2021-07-23 12:38:56 +02:00
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
2021-07-22 11:35:40 +02:00
" <th>subjects</th>\n",
2021-07-23 12:38:56 +02:00
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
2021-07-22 11:35:40 +02:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2021-07-23 12:38:56 +02:00
" <th>count</th>\n",
" <td>17589</td>\n",
" <td>5375</td>\n",
" <td>5375</td>\n",
" <td>5375</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>5375</td>\n",
" <td>5375</td>\n",
" <td>5375</td>\n",
" <td>5375</td>\n",
" <td>5375</td>\n",
" <td>107</td>\n",
" <td>0</td>\n",
" <td>5375</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>86</td>\n",
" <td>86</td>\n",
" <td>86</td>\n",
" <td>86</td>\n",
" <td>86</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2245</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>265</td>\n",
" <td>5368</td>\n",
" <td>5373</td>\n",
" <td>4468</td>\n",
" <td>176</td>\n",
" <td>1521</td>\n",
" <td>115</td>\n",
" <td>3782</td>\n",
" <td>4127</td>\n",
" <td>4127</td>\n",
" <td>3676</td>\n",
" <td>5182</td>\n",
" <td>4898</td>\n",
" <td>5205</td>\n",
" <td>3774</td>\n",
" <td>3752</td>\n",
" <td>3734</td>\n",
" <td>4637</td>\n",
" <td>4671</td>\n",
" <td>5375</td>\n",
" <td>10524</td>\n",
" <td>5360</td>\n",
" <td>215</td>\n",
" <td>187</td>\n",
" <td>2291</td>\n",
" <td>2291</td>\n",
" <td>2291</td>\n",
" <td>2293</td>\n",
" <td>2291</td>\n",
" <td>270</td>\n",
" <td>258</td>\n",
" <td>270</td>\n",
" <td>258</td>\n",
" <td>7398</td>\n",
" <td>7266</td>\n",
" <td>567</td>\n",
" <td>205</td>\n",
" <td>205</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>756</td>\n",
" <td>756</td>\n",
" <td>231</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>5375</td>\n",
" <td>658</td>\n",
" <td>1</td>\n",
" <td>2135</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>5375</td>\n",
" <td>4127</td>\n",
" <td>3966</td>\n",
" <td>4158</td>\n",
" <td>12</td>\n",
" <td>107</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>51</td>\n",
" <td>3</td>\n",
" <td>68</td>\n",
" <td>3</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>5202</td>\n",
" <td>5076</td>\n",
" <td>4179</td>\n",
" <td>170</td>\n",
" <td>1468</td>\n",
" <td>111</td>\n",
" <td>3304</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>4437</td>\n",
" <td>4328</td>\n",
" <td>136</td>\n",
" <td>1875</td>\n",
" <td>2927</td>\n",
" <td>2965</td>\n",
" <td>31</td>\n",
" <td>126</td>\n",
" <td>53</td>\n",
" <td>237</td>\n",
" <td>4830</td>\n",
" <td>207</td>\n",
" <td>171</td>\n",
" <td>72</td>\n",
" <td>54</td>\n",
" <td>16</td>\n",
" <td>741</td>\n",
" <td>1704</td>\n",
" <td>135</td>\n",
" <td>118</td>\n",
" <td>134</td>\n",
" <td>117</td>\n",
" <td>3</td>\n",
" <td>5166</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>146</td>\n",
" <td>143</td>\n",
" <td>346</td>\n",
" <td>342</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>10159</td>\n",
" <td>11</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-26 11:15:14 +02:00
" <td>disk0/00/00/14/07</td>\n",
2021-07-23 12:38:56 +02:00
" <td>2010-01-06 13:43:48</td>\n",
" <td>2011-07-06 08:24:53</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>institutional</td>\n",
2021-07-26 11:15:14 +02:00
" <td>12637</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>bad_oai_pmh_url_0</td>\n",
" <td>duplicate_title</td>\n",
" <td>Similar title to &lt;xhtml:table xmlns:xhtml=\"htt...</td>\n",
" <td>2010-01-13 10:44:49</td>\n",
" <td>discovered</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>FALSE</td>\n",
" <td>http://eprints.upnjatim.ac.id/</td>\n",
" <td>Repositorio Institucional</td>\n",
" <td>http://virtuelcampus.univ-msila.dz/fll</td>\n",
2021-07-26 11:15:14 +02:00
" <td>http://npl.csircentral.net/</td>\n",
2021-07-23 12:38:56 +02:00
" <td>http://eprints.upnjatim.ac.id/cgi/latest_tool?...</td>\n",
2021-07-26 11:15:14 +02:00
" <td>https://twitter.com/rpsicomdp?lang=es</td>\n",
2021-07-23 12:38:56 +02:00
" <td>info:other:archives.eprints.org:import</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>FALSE</td>\n",
" <td>Chinese Academy of Science (中国科学院)</td>\n",
" <td>http://www.cas.cn/</td>\n",
" <td>us</td>\n",
" <td>Lima</td>\n",
" <td>34.1607</td>\n",
" <td>-118.139</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_US</td>\n",
" <td>other</td>\n",
" <td>L1</td>\n",
" <td>2006-05-04 10:48:14</td>\n",
2021-07-26 11:15:14 +02:00
" <td>¿Quién puede depositar documentos en el reposi...</td>\n",
2021-07-23 12:38:56 +02:00
" <td>This repository is hosted by the Texas Digital...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>100</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>celestial</td>\n",
" <td>184</td>\n",
" <td>celestial</td>\n",
" <td>opendoar</td>\n",
" <td>2021-01-25</td>\n",
2021-07-26 11:15:14 +02:00
" <td>367</td>\n",
" <td>738</td>\n",
" <td>362</td>\n",
2021-07-23 12:38:56 +02:00
" <td>824</td>\n",
" <td>806</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>0</td>\n",
" <td>russell_group</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>238</td>\n",
" <td>332</td>\n",
" <td>5375</td>\n",
" <td>1333</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>16</td>\n",
" <td>8</td>\n",
" <td>16</td>\n",
" <td>3795</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>5334</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>18</td>\n",
" <td>47</td>\n",
" <td>3</td>\n",
" <td>68</td>\n",
" <td>53</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2204</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>258</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>112</td>\n",
" <td>2758</td>\n",
" <td>2652</td>\n",
" <td>2699</td>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>902</td>\n",
" <td>70</td>\n",
" <td>25</td>\n",
" <td>25</td>\n",
" <td>2307</td>\n",
" <td>840</td>\n",
" <td>4771</td>\n",
" <td>348</td>\n",
" <td>99</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>2015</td>\n",
" <td>2077</td>\n",
" <td>2213</td>\n",
" <td>733</td>\n",
" <td>95</td>\n",
" <td>113</td>\n",
" <td>114</td>\n",
" <td>113</td>\n",
" <td>114</td>\n",
" <td>3758</td>\n",
" <td>7</td>\n",
" <td>216</td>\n",
" <td>205</td>\n",
" <td>205</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>387</td>\n",
" <td>387</td>\n",
" <td>141</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" eprintid rev_number eprint_status userid importid source \\\n",
"count 17589 5375 5375 5375 0 0 \n",
"unique 5375 658 1 2135 0 0 \n",
"top 10159 11 archive 1 NaN NaN \n",
"freq 238 332 5375 1333 NaN NaN \n",
"\n",
" dir datestamp lastmod \\\n",
"count 5375 5375 5375 \n",
"unique 5375 4127 3966 \n",
2021-07-26 11:15:14 +02:00
"top disk0/00/00/14/07 2010-01-06 13:43:48 2011-07-06 08:24:53 \n",
2021-07-23 12:38:56 +02:00
"freq 1 16 8 \n",
"\n",
" status_changed type succeeds commentary \\\n",
"count 5375 5375 107 0 \n",
"unique 4158 12 107 0 \n",
2021-07-26 11:15:14 +02:00
"top 2010-01-06 13:43:48 institutional 12637 NaN \n",
2021-07-23 12:38:56 +02:00
"freq 16 3795 1 NaN \n",
"\n",
" metadata_visibility latitude longitude relation_type relation_uri \\\n",
"count 5375 0 0 0 0 \n",
"unique 2 0 0 0 0 \n",
"top show NaN NaN NaN NaN \n",
"freq 5334 NaN NaN NaN NaN \n",
"\n",
" item_issues_id item_issues_type \\\n",
"count 86 86 \n",
"unique 51 3 \n",
"top bad_oai_pmh_url_0 duplicate_title \n",
"freq 18 47 \n",
"\n",
" item_issues_description \\\n",
"count 86 \n",
"unique 68 \n",
"top Similar title to <xhtml:table xmlns:xhtml=\"htt... \n",
"freq 3 \n",
"\n",
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
"count 86 86 0 \n",
"unique 3 2 0 \n",
"top 2010-01-13 10:44:49 discovered NaN \n",
"freq 68 53 NaN \n",
"\n",
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
"count 0 0 2245 \n",
"unique 0 0 4 \n",
"top NaN NaN 0 \n",
"freq NaN NaN 2204 \n",
"\n",
" sword_depositor sword_slug exemplar home_page \\\n",
"count 0 0 265 5368 \n",
"unique 0 0 2 5202 \n",
"top NaN NaN FALSE http://eprints.upnjatim.ac.id/ \n",
"freq NaN NaN 258 4 \n",
"\n",
" title oai_pmh \\\n",
"count 5373 4468 \n",
"unique 5076 4179 \n",
"top Repositorio Institucional http://virtuelcampus.univ-msila.dz/fll \n",
"freq 7 5 \n",
"\n",
2021-07-26 11:15:14 +02:00
" sword_endpoint \\\n",
"count 176 \n",
"unique 170 \n",
"top http://npl.csircentral.net/ \n",
"freq 2 \n",
2021-07-23 12:38:56 +02:00
"\n",
" rss_feed \\\n",
"count 1521 \n",
"unique 1468 \n",
"top http://eprints.upnjatim.ac.id/cgi/latest_tool?... \n",
"freq 5 \n",
"\n",
2021-07-26 11:15:14 +02:00
" twitter_feed \\\n",
"count 115 \n",
"unique 111 \n",
"top https://twitter.com/rpsicomdp?lang=es \n",
"freq 2 \n",
2021-07-23 12:38:56 +02:00
"\n",
" description fulltext open_access mandate \\\n",
"count 3782 4127 4127 3676 \n",
"unique 3304 2 2 2 \n",
"top info:other:archives.eprints.org:import TRUE TRUE FALSE \n",
"freq 112 2758 2652 2699 \n",
"\n",
" organisation_title organisation_home_page \\\n",
"count 5182 4898 \n",
"unique 4437 4328 \n",
"top Chinese Academy of Science (中国科学院) http://www.cas.cn/ \n",
"freq 9 9 \n",
"\n",
" location_country location_city location_latitude location_longitude \\\n",
"count 5205 3774 3752 3734 \n",
"unique 136 1875 2927 2965 \n",
"top us Lima 34.1607 -118.139 \n",
"freq 902 70 25 25 \n",
"\n",
" software geoname version subjects date \\\n",
"count 4637 4671 5375 10524 5360 \n",
"unique 31 126 53 237 4830 \n",
"top dspace geoname_2_US other L1 2006-05-04 10:48:14 \n",
"freq 2307 840 4771 348 99 \n",
"\n",
" note \\\n",
"count 215 \n",
"unique 207 \n",
2021-07-26 11:15:14 +02:00
"top ¿Quién puede depositar documentos en el reposi... \n",
2021-07-23 12:38:56 +02:00
"freq 2 \n",
"\n",
" suggestions activity_low \\\n",
"count 187 2291 \n",
"unique 171 72 \n",
"top This repository is hosted by the Texas Digital... 0 \n",
"freq 9 2015 \n",
"\n",
" activity_medium activity_high recordcount \\\n",
"count 2291 2291 2293 \n",
"unique 54 16 741 \n",
"top 0 0 100 \n",
"freq 2077 2213 733 \n",
"\n",
" recordhistory fulltexts_total \\\n",
"count 2291 270 \n",
"unique 1704 135 \n",
"top 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 0 \n",
"freq 95 113 \n",
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name \\\n",
"count 258 270 258 7398 \n",
"unique 118 134 117 3 \n",
"top 0 0 0 celestial \n",
"freq 114 113 114 3758 \n",
"\n",
" registry_id submit_to submitted_to_name submitted_to_done \\\n",
"count 7266 567 205 205 \n",
"unique 5166 3 1 1 \n",
"top 184 celestial opendoar 2021-01-25 \n",
"freq 7 216 205 205 \n",
"\n",
" webometrics_rank webometrics_size webometrics_visibility \\\n",
"count 148 148 148 \n",
"unique 148 148 148 \n",
2021-07-26 11:15:14 +02:00
"top 367 738 362 \n",
2021-07-23 12:38:56 +02:00
"freq 1 1 1 \n",
"\n",
" webometrics_rich_files webometrics_scholar \\\n",
"count 148 148 \n",
"unique 146 143 \n",
"top 824 806 \n",
"freq 3 5 \n",
"\n",
" monthly_deposits total_deposits \\\n",
"count 756 756 \n",
"unique 346 342 \n",
"top 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 0 \n",
"freq 387 387 \n",
"\n",
" association \n",
"count 231 \n",
"unique 2 \n",
"top russell_group \n",
"freq 141 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df.describe(include='all')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>eprintid</th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>606</td>\n",
" <td>657</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/06/06</td>\n",
" <td>2010-01-06 13:44:31</td>\n",
" <td>2016-04-17 21:53:14</td>\n",
" <td>2010-01-06 13:44:31</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://hal.archives-ouvertes.fr/</td>\n",
" <td>HAL: Hyper Article en Ligne</td>\n",
" <td>http://hal.archives-ouvertes.fr/oai/oai.php</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>hal</td>\n",
" <td>geoname_2_FR</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>1998-11-02 11:53:57</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>675816</td>\n",
" <td>4,12,17,26,43,57,81,185,431,861,1184,1517,2442...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>opendoar</td>\n",
" <td>166</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>3063</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>606</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>1106</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>606</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roarmap</td>\n",
" <td>69</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" eprintid rev_number eprint_status userid importid source dir \\\n",
"2 606 657 archive 1 NaN NaN disk0/00/00/06/06 \n",
"3 606 NaN NaN NaN NaN NaN NaN \n",
"4 606 NaN NaN NaN NaN NaN NaN \n",
"\n",
" datestamp lastmod status_changed type \\\n",
"2 2010-01-06 13:44:31 2016-04-17 21:53:14 2010-01-06 13:44:31 subject \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" succeeds commentary metadata_visibility latitude longitude relation_type \\\n",
"2 NaN NaN show NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN \n",
"\n",
" relation_uri item_issues_id item_issues_type item_issues_description \\\n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
"2 NaN NaN 0 \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" sword_depositor sword_slug exemplar home_page \\\n",
"2 NaN NaN NaN http://hal.archives-ouvertes.fr/ \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" title oai_pmh \\\n",
"2 HAL: Hyper Article en Ligne http://hal.archives-ouvertes.fr/oai/oai.php \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" sword_endpoint rss_feed twitter_feed description fulltext open_access \\\n",
"2 NaN NaN NaN NaN TRUE TRUE \n",
"3 NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN \n",
"\n",
" mandate organisation_title organisation_home_page location_country \\\n",
"2 NaN NaN NaN fr \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" location_city location_latitude location_longitude software geoname \\\n",
"2 NaN NaN NaN hal geoname_2_FR \n",
"3 NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN \n",
"\n",
" version subjects date note suggestions activity_low \\\n",
"2 other NaN 1998-11-02 11:53:57 NaN NaN 0 \n",
"3 NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN \n",
"\n",
" activity_medium activity_high recordcount \\\n",
"2 0 2 675816 \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" recordhistory fulltexts_total \\\n",
"2 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name registry_id \\\n",
"2 NaN NaN NaN opendoar 166 \n",
"3 NaN NaN NaN celestial 1106 \n",
"4 NaN NaN NaN roarmap 69 \n",
"\n",
" submit_to submitted_to_name submitted_to_done webometrics_rank \\\n",
"2 NaN NaN NaN 1 \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" webometrics_size webometrics_visibility webometrics_rich_files \\\n",
"2 6 1 1 \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" webometrics_scholar monthly_deposits \\\n",
"2 1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" total_deposits association \n",
"2 3063 NaN \n",
"3 NaN NaN \n",
"4 NaN NaN "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df[roar_df.eprintid == '606']"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>eprintid</th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>606</td>\n",
" <td>657</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/06/06</td>\n",
" <td>2010-01-06 13:44:31</td>\n",
" <td>2016-04-17 21:53:14</td>\n",
" <td>2010-01-06 13:44:31</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://hal.archives-ouvertes.fr/</td>\n",
" <td>HAL: Hyper Article en Ligne</td>\n",
" <td>http://hal.archives-ouvertes.fr/oai/oai.php</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>hal</td>\n",
" <td>geoname_2_FR</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>1998-11-02 11:53:57</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>675816</td>\n",
" <td>4,12,17,26,43,57,81,185,431,861,1184,1517,2442...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>opendoar</td>\n",
" <td>166</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>3063</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>606</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>1106</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>606</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roarmap</td>\n",
" <td>69</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" eprintid rev_number eprint_status userid importid source dir \\\n",
"2 606 657 archive 1 NaN NaN disk0/00/00/06/06 \n",
"3 606 NaN NaN NaN NaN NaN NaN \n",
"4 606 NaN NaN NaN NaN NaN NaN \n",
"\n",
" datestamp lastmod status_changed type \\\n",
"2 2010-01-06 13:44:31 2016-04-17 21:53:14 2010-01-06 13:44:31 subject \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" succeeds commentary metadata_visibility latitude longitude relation_type \\\n",
"2 NaN NaN show NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN \n",
"\n",
" relation_uri item_issues_id item_issues_type item_issues_description \\\n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
"2 NaN NaN 0 \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" sword_depositor sword_slug exemplar home_page \\\n",
"2 NaN NaN NaN http://hal.archives-ouvertes.fr/ \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" title oai_pmh \\\n",
"2 HAL: Hyper Article en Ligne http://hal.archives-ouvertes.fr/oai/oai.php \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" sword_endpoint rss_feed twitter_feed description fulltext open_access \\\n",
"2 NaN NaN NaN NaN TRUE TRUE \n",
"3 NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN \n",
"\n",
" mandate organisation_title organisation_home_page location_country \\\n",
"2 NaN NaN NaN fr \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" location_city location_latitude location_longitude software geoname \\\n",
"2 NaN NaN NaN hal geoname_2_FR \n",
"3 NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN \n",
"\n",
" version subjects date note suggestions activity_low \\\n",
"2 other NaN 1998-11-02 11:53:57 NaN NaN 0 \n",
"3 NaN NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN NaN \n",
"\n",
" activity_medium activity_high recordcount \\\n",
"2 0 2 675816 \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" recordhistory fulltexts_total \\\n",
"2 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name registry_id \\\n",
"2 NaN NaN NaN opendoar 166 \n",
"3 NaN NaN NaN celestial 1106 \n",
"4 NaN NaN NaN roarmap 69 \n",
"\n",
" submit_to submitted_to_name submitted_to_done webometrics_rank \\\n",
"2 NaN NaN NaN 1 \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" webometrics_size webometrics_visibility webometrics_rich_files \\\n",
"2 6 1 1 \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" webometrics_scholar monthly_deposits \\\n",
"2 1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" total_deposits association \n",
"2 3063 NaN \n",
"3 NaN NaN \n",
"4 NaN NaN "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df[roar_df.eprintid == '606']"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
" </tr>\n",
" <tr>\n",
" <th>eprintid</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>{nan, 633}</td>\n",
" <td>{nan, archive}</td>\n",
" <td>{nan, 1}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{nan, disk0/00/00/00/01}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, 2010-01-06 13:43:48}</td>\n",
" <td>{nan, 2011-07-18 05:40:07}</td>\n",
" <td>{nan, 2010-01-06 13:43:48}</td>\n",
" <td>{nan, subject}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, show}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, 0}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, http://archivesic.ccsd.cnrs.fr/}</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{@RCHIVESIC , nan}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, http://archivesic.ccsd.cnrs.fr/oai/oai.php}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, fr}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, hal}</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{geoname_2_FR, nan}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, other}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, 2002-05-17 19:24:41}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, 0}</td>\n",
" <td>{nan, 0}</td>\n",
" <td>{nan, 0}</td>\n",
" <td>{nan, 25}</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0...</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{celestial, opendoar}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{58, 669}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>{nan, 511}</td>\n",
" <td>{nan, archive}</td>\n",
" <td>{nan, 1}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, disk0/00/00/00/10}</td>\n",
" <td>{nan, 2010-01-06 13:43:48}</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{nan, 2011-07-18 05:40:13}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, 2010-01-06 13:43:48}</td>\n",
" <td>{nan, institutional}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, show}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, 0}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, http://www.diva-portal.org/mdh/}</td>\n",
" <td>{nan, Academic Archive On-line (Mälardalen Uni...</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{nan, http://www.diva-portal.org/oai/mdh/OAI}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{nan, TRUE}</td>\n",
" <td>{nan, TRUE}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, se}</td>\n",
" <td>{nan, Uppsala}</td>\n",
" <td>{nan, 59.8667}</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{nan, 17.6333}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, diva}</td>\n",
" <td>{nan, geoname_2_SE}</td>\n",
" <td>{nan, other}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, 2005-12-08 13:15:22}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, 0}</td>\n",
" <td>{nan, 0}</td>\n",
" <td>{nan, 0}</td>\n",
" <td>{nan, 100}</td>\n",
" <td>{nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,1...</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{celestial, opendoar}</td>\n",
" <td>{258, 526}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1000</th>\n",
" <td>{274}</td>\n",
" <td>{archive}</td>\n",
" <td>{1}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{disk0/00/00/10/00}</td>\n",
" <td>{2010-01-06 13:45:01}</td>\n",
" <td>{2011-07-06 08:21:21}</td>\n",
" <td>{2010-01-06 13:45:01}</td>\n",
" <td>{subject}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{show}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{0}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{http://pam.pisharp.org/}</td>\n",
" <td>{PAM - Portuguese Archive of Mathematics}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{TRUE}</td>\n",
" <td>{TRUE}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{pt}</td>\n",
" <td>{Bellevue, WA}</td>\n",
" <td>{47.6034}</td>\n",
" <td>{-122.155}</td>\n",
" <td>{dspace}</td>\n",
" <td>{geoname_2_PT}</td>\n",
" <td>{other}</td>\n",
" <td>{nan}</td>\n",
" <td>{2006-05-04 10:48:14}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10001</th>\n",
" <td>{nan, 20}</td>\n",
" <td>{nan, archive}</td>\n",
" <td>{nan, 91}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, disk0/00/01/00/01}</td>\n",
" <td>{nan, 2015-08-08 14:52:11}</td>\n",
" <td>{nan, 2016-03-21 19:44:01}</td>\n",
" <td>{nan, 2015-08-08 14:52:11}</td>\n",
" <td>{nan, subject}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, show}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{http://edoc.sub.uni-hamburg.de/klimawandel/, ...</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, Klimawandel Dokumentenserver}</td>\n",
" <td>{nan, http://edoc.sub.uni-hamburg.de/klimawand...</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan, The \"Documentenserver Klimawandel\" (Repo...</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{nan, TRUE}</td>\n",
" <td>{nan, TRUE}</td>\n",
" <td>{nan, TRUE}</td>\n",
" <td>{nan, KLIMZUG projects, Helmholtz-Zentrum Gees...</td>\n",
" <td>{http://www.climateservicecenter.de/, nan, htt...</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, de}</td>\n",
" <td>{nan, Hamburg}</td>\n",
" <td>{nan, 53.5511}</td>\n",
" <td>{nan, 9.9937}</td>\n",
" <td>{nan, opus}</td>\n",
" <td>{nan, geoname_2_DE}</td>\n",
" <td>{nan, other}</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{S1, HD, GF, GE, G1}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan, 2015-07-02 08:08:31}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
2021-07-26 11:15:14 +02:00
" <td>{nan, celestial, opendoar}</td>\n",
" <td>{3408, 5881, nan}</td>\n",
2021-07-23 12:38:56 +02:00
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10008</th>\n",
" <td>{11}</td>\n",
" <td>{archive}</td>\n",
" <td>{404}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{disk0/00/01/00/08}</td>\n",
" <td>{2015-08-08 14:52:26}</td>\n",
" <td>{2016-03-21 19:43:51}</td>\n",
" <td>{2015-08-08 14:52:26}</td>\n",
" <td>{institutional}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{show}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{http://creativematter.skidmore.edu/}</td>\n",
" <td>{Creative Matter | Skidmore College Research}</td>\n",
" <td>{http://creativematter.skidmore.edu/do/oai/}</td>\n",
" <td>{nan}</td>\n",
" <td>{http://creativematter.skidmore.edu/recent.rss}</td>\n",
" <td>{nan}</td>\n",
" <td>{Welcome to Creative Matter, a repository for ...</td>\n",
" <td>{TRUE}</td>\n",
" <td>{FALSE}</td>\n",
" <td>{FALSE}</td>\n",
" <td>{Skidmore College}</td>\n",
" <td>{http://www.skidmore.edu/}</td>\n",
" <td>{us}</td>\n",
" <td>{Saratoga Springs}</td>\n",
" <td>{43.0961}</td>\n",
" <td>{-73.7818}</td>\n",
" <td>{bepress}</td>\n",
" <td>{geoname_2_US}</td>\n",
" <td>{other}</td>\n",
" <td>{nan}</td>\n",
" <td>{2015-07-06 17:35:50}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{celestial}</td>\n",
" <td>{5882}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" <td>{nan}</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" rev_number eprint_status userid importid source \\\n",
"eprintid \n",
"1 {nan, 633} {nan, archive} {nan, 1} {nan} {nan} \n",
"10 {nan, 511} {nan, archive} {nan, 1} {nan} {nan} \n",
"1000 {274} {archive} {1} {nan} {nan} \n",
"10001 {nan, 20} {nan, archive} {nan, 91} {nan} {nan} \n",
"10008 {11} {archive} {404} {nan} {nan} \n",
"\n",
" dir datestamp \\\n",
"eprintid \n",
2021-07-26 11:15:14 +02:00
"1 {nan, disk0/00/00/00/01} {nan, 2010-01-06 13:43:48} \n",
2021-07-23 12:38:56 +02:00
"10 {nan, disk0/00/00/00/10} {nan, 2010-01-06 13:43:48} \n",
"1000 {disk0/00/00/10/00} {2010-01-06 13:45:01} \n",
"10001 {nan, disk0/00/01/00/01} {nan, 2015-08-08 14:52:11} \n",
"10008 {disk0/00/01/00/08} {2015-08-08 14:52:26} \n",
"\n",
" lastmod status_changed \\\n",
"eprintid \n",
"1 {nan, 2011-07-18 05:40:07} {nan, 2010-01-06 13:43:48} \n",
2021-07-26 11:15:14 +02:00
"10 {nan, 2011-07-18 05:40:13} {nan, 2010-01-06 13:43:48} \n",
2021-07-23 12:38:56 +02:00
"1000 {2011-07-06 08:21:21} {2010-01-06 13:45:01} \n",
"10001 {nan, 2016-03-21 19:44:01} {nan, 2015-08-08 14:52:11} \n",
"10008 {2016-03-21 19:43:51} {2015-08-08 14:52:26} \n",
"\n",
" type succeeds commentary metadata_visibility \\\n",
"eprintid \n",
"1 {nan, subject} {nan} {nan} {nan, show} \n",
"10 {nan, institutional} {nan} {nan} {nan, show} \n",
"1000 {subject} {nan} {nan} {show} \n",
"10001 {nan, subject} {nan} {nan} {nan, show} \n",
"10008 {institutional} {nan} {nan} {show} \n",
"\n",
" latitude longitude relation_type relation_uri item_issues_id \\\n",
"eprintid \n",
"1 {nan} {nan} {nan} {nan} {nan} \n",
"10 {nan} {nan} {nan} {nan} {nan} \n",
"1000 {nan} {nan} {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} {nan} {nan} \n",
"\n",
" item_issues_type item_issues_description item_issues_timestamp \\\n",
"eprintid \n",
"1 {nan} {nan} {nan} \n",
"10 {nan} {nan} {nan} \n",
"1000 {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} \n",
"\n",
" item_issues_status item_issues_reported_by item_issues_resolved_by \\\n",
"eprintid \n",
"1 {nan} {nan} {nan} \n",
"10 {nan} {nan} {nan} \n",
"1000 {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} \n",
"\n",
" item_issues_comment item_issues_count sword_depositor sword_slug \\\n",
"eprintid \n",
"1 {nan} {nan, 0} {nan} {nan} \n",
"10 {nan} {nan, 0} {nan} {nan} \n",
"1000 {nan} {0} {nan} {nan} \n",
"10001 {nan} {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} {nan} \n",
"\n",
" exemplar home_page \\\n",
"eprintid \n",
"1 {nan} {nan, http://archivesic.ccsd.cnrs.fr/} \n",
"10 {nan} {nan, http://www.diva-portal.org/mdh/} \n",
"1000 {nan} {http://pam.pisharp.org/} \n",
2021-07-26 11:15:14 +02:00
"10001 {nan} {http://edoc.sub.uni-hamburg.de/klimawandel/, ... \n",
2021-07-23 12:38:56 +02:00
"10008 {nan} {http://creativematter.skidmore.edu/} \n",
"\n",
" title \\\n",
"eprintid \n",
2021-07-26 11:15:14 +02:00
"1 {@RCHIVESIC , nan} \n",
2021-07-23 12:38:56 +02:00
"10 {nan, Academic Archive On-line (Mälardalen Uni... \n",
"1000 {PAM - Portuguese Archive of Mathematics} \n",
"10001 {nan, Klimawandel Dokumentenserver} \n",
"10008 {Creative Matter | Skidmore College Research} \n",
"\n",
" oai_pmh sword_endpoint \\\n",
"eprintid \n",
"1 {nan, http://archivesic.ccsd.cnrs.fr/oai/oai.php} {nan} \n",
2021-07-26 11:15:14 +02:00
"10 {nan, http://www.diva-portal.org/oai/mdh/OAI} {nan} \n",
2021-07-23 12:38:56 +02:00
"1000 {nan} {nan} \n",
"10001 {nan, http://edoc.sub.uni-hamburg.de/klimawand... {nan} \n",
"10008 {http://creativematter.skidmore.edu/do/oai/} {nan} \n",
"\n",
" rss_feed twitter_feed \\\n",
"eprintid \n",
"1 {nan} {nan} \n",
"10 {nan} {nan} \n",
"1000 {nan} {nan} \n",
"10001 {nan} {nan} \n",
"10008 {http://creativematter.skidmore.edu/recent.rss} {nan} \n",
"\n",
" description fulltext \\\n",
"eprintid \n",
"1 {nan} {nan} \n",
2021-07-26 11:15:14 +02:00
"10 {nan} {nan, TRUE} \n",
2021-07-23 12:38:56 +02:00
"1000 {nan} {TRUE} \n",
2021-07-26 11:15:14 +02:00
"10001 {nan, The \"Documentenserver Klimawandel\" (Repo... {nan, TRUE} \n",
2021-07-23 12:38:56 +02:00
"10008 {Welcome to Creative Matter, a repository for ... {TRUE} \n",
"\n",
" open_access mandate \\\n",
"eprintid \n",
"1 {nan} {nan} \n",
2021-07-26 11:15:14 +02:00
"10 {nan, TRUE} {nan} \n",
2021-07-23 12:38:56 +02:00
"1000 {TRUE} {nan} \n",
2021-07-26 11:15:14 +02:00
"10001 {nan, TRUE} {nan, TRUE} \n",
2021-07-23 12:38:56 +02:00
"10008 {FALSE} {FALSE} \n",
"\n",
" organisation_title \\\n",
"eprintid \n",
"1 {nan} \n",
"10 {nan} \n",
"1000 {nan} \n",
2021-07-26 11:15:14 +02:00
"10001 {nan, KLIMZUG projects, Helmholtz-Zentrum Gees... \n",
2021-07-23 12:38:56 +02:00
"10008 {Skidmore College} \n",
"\n",
" organisation_home_page location_country \\\n",
"eprintid \n",
"1 {nan} {nan, fr} \n",
"10 {nan} {nan, se} \n",
"1000 {nan} {pt} \n",
2021-07-26 11:15:14 +02:00
"10001 {http://www.climateservicecenter.de/, nan, htt... {nan, de} \n",
2021-07-23 12:38:56 +02:00
"10008 {http://www.skidmore.edu/} {us} \n",
"\n",
" location_city location_latitude location_longitude \\\n",
"eprintid \n",
"1 {nan} {nan} {nan} \n",
2021-07-26 11:15:14 +02:00
"10 {nan, Uppsala} {nan, 59.8667} {nan, 17.6333} \n",
2021-07-23 12:38:56 +02:00
"1000 {Bellevue, WA} {47.6034} {-122.155} \n",
"10001 {nan, Hamburg} {nan, 53.5511} {nan, 9.9937} \n",
"10008 {Saratoga Springs} {43.0961} {-73.7818} \n",
"\n",
" software geoname version \\\n",
"eprintid \n",
2021-07-26 11:15:14 +02:00
"1 {nan, hal} {geoname_2_FR, nan} {nan, other} \n",
2021-07-23 12:38:56 +02:00
"10 {nan, diva} {nan, geoname_2_SE} {nan, other} \n",
"1000 {dspace} {geoname_2_PT} {other} \n",
"10001 {nan, opus} {nan, geoname_2_DE} {nan, other} \n",
"10008 {bepress} {geoname_2_US} {other} \n",
"\n",
" subjects date note suggestions \\\n",
"eprintid \n",
"1 {nan} {nan, 2002-05-17 19:24:41} {nan} {nan} \n",
"10 {nan} {nan, 2005-12-08 13:15:22} {nan} {nan} \n",
"1000 {nan} {2006-05-04 10:48:14} {nan} {nan} \n",
2021-07-26 11:15:14 +02:00
"10001 {S1, HD, GF, GE, G1} {nan, 2015-07-02 08:08:31} {nan} {nan} \n",
2021-07-23 12:38:56 +02:00
"10008 {nan} {2015-07-06 17:35:50} {nan} {nan} \n",
"\n",
" activity_low activity_medium activity_high recordcount \\\n",
"eprintid \n",
"1 {nan, 0} {nan, 0} {nan, 0} {nan, 25} \n",
"10 {nan, 0} {nan, 0} {nan, 0} {nan, 100} \n",
"1000 {nan} {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} {nan} \n",
"\n",
" recordhistory fulltexts_total \\\n",
"eprintid \n",
2021-07-26 11:15:14 +02:00
"1 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0... {nan} \n",
2021-07-23 12:38:56 +02:00
"10 {nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,1... {nan} \n",
"1000 {nan} {nan} \n",
"10001 {nan} {nan} \n",
"10008 {nan} {nan} \n",
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs \\\n",
"eprintid \n",
"1 {nan} {nan} {nan} \n",
"10 {nan} {nan} {nan} \n",
"1000 {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} \n",
"\n",
" registry_name registry_id submit_to \\\n",
"eprintid \n",
2021-07-26 11:15:14 +02:00
"1 {celestial, opendoar} {58, 669} {nan} \n",
"10 {celestial, opendoar} {258, 526} {nan} \n",
2021-07-23 12:38:56 +02:00
"1000 {nan} {nan} {nan} \n",
2021-07-26 11:15:14 +02:00
"10001 {nan, celestial, opendoar} {3408, 5881, nan} {nan} \n",
2021-07-23 12:38:56 +02:00
"10008 {celestial} {5882} {nan} \n",
"\n",
" submitted_to_name submitted_to_done webometrics_rank \\\n",
"eprintid \n",
"1 {nan} {nan} {nan} \n",
"10 {nan} {nan} {nan} \n",
"1000 {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} \n",
"\n",
" webometrics_size webometrics_visibility webometrics_rich_files \\\n",
"eprintid \n",
"1 {nan} {nan} {nan} \n",
"10 {nan} {nan} {nan} \n",
"1000 {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} \n",
"\n",
" webometrics_scholar monthly_deposits total_deposits association \n",
"eprintid \n",
"1 {nan} {nan} {nan} {nan} \n",
"10 {nan} {nan} {nan} {nan} \n",
"1000 {nan} {nan} {nan} {nan} \n",
"10001 {nan} {nan} {nan} {nan} \n",
"10008 {nan} {nan} {nan} {nan} "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df = roar_df.groupby('eprintid').aggregate(set)\n",
"roar_df.head()"
]
},
{
"cell_type": "code",
2021-07-26 11:15:14 +02:00
"execution_count": 8,
2021-07-23 12:38:56 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
" </tr>\n",
" <tr>\n",
" <th>eprintid</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>633</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/00/01</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>2011-07-18 05:40:07</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://archivesic.ccsd.cnrs.fr/</td>\n",
" <td>@RCHIVESIC</td>\n",
" <td>http://archivesic.ccsd.cnrs.fr/oai/oai.php</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>hal</td>\n",
" <td>geoname_2_FR</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2002-05-17 19:24:41</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>25</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-26 11:15:14 +02:00
" <td>[celestial, opendoar]</td>\n",
2021-07-23 12:38:56 +02:00
" <td>[58, 669]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>511</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/00/10</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>2011-07-18 05:40:13</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>institutional</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://www.diva-portal.org/mdh/</td>\n",
" <td>Academic Archive On-line (Mälardalen Universit...</td>\n",
" <td>http://www.diva-portal.org/oai/mdh/OAI</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>se</td>\n",
" <td>Uppsala</td>\n",
" <td>59.8667</td>\n",
" <td>17.6333</td>\n",
" <td>diva</td>\n",
" <td>geoname_2_SE</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2005-12-08 13:15:22</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>100</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-26 11:15:14 +02:00
" <td>[celestial, opendoar]</td>\n",
" <td>[258, 526]</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1000</th>\n",
" <td>274</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/10/00</td>\n",
" <td>2010-01-06 13:45:01</td>\n",
" <td>2011-07-06 08:21:21</td>\n",
" <td>2010-01-06 13:45:01</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://pam.pisharp.org/</td>\n",
" <td>PAM - Portuguese Archive of Mathematics</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>pt</td>\n",
" <td>Bellevue, WA</td>\n",
" <td>47.6034</td>\n",
" <td>-122.155</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_PT</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2006-05-04 10:48:14</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10001</th>\n",
" <td>20</td>\n",
" <td>archive</td>\n",
" <td>91</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/01/00/01</td>\n",
" <td>2015-08-08 14:52:11</td>\n",
" <td>2016-03-21 19:44:01</td>\n",
" <td>2015-08-08 14:52:11</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://edoc.sub.uni-hamburg.de/klimawandel/</td>\n",
" <td>Klimawandel Dokumentenserver</td>\n",
" <td>http://edoc.sub.uni-hamburg.de/klimawandel/oai</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>The \"Documentenserver Klimawandel\" (Repository...</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
2021-07-26 11:15:14 +02:00
" <td>[KLIMZUG projects, Helmholtz-Zentrum Geesthach...</td>\n",
2021-07-23 12:38:56 +02:00
" <td>[http://www.climateservicecenter.de/, http://w...</td>\n",
" <td>de</td>\n",
" <td>Hamburg</td>\n",
" <td>53.5511</td>\n",
" <td>9.9937</td>\n",
" <td>opus</td>\n",
" <td>geoname_2_DE</td>\n",
" <td>other</td>\n",
2021-07-26 11:15:14 +02:00
" <td>[S1, GE, HD, GF, G1]</td>\n",
2021-07-23 12:38:56 +02:00
" <td>2015-07-02 08:08:31</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-26 11:15:14 +02:00
" <td>[celestial, opendoar]</td>\n",
2021-07-23 12:38:56 +02:00
" <td>[3408, 5881]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10008</th>\n",
" <td>11</td>\n",
" <td>archive</td>\n",
" <td>404</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/01/00/08</td>\n",
" <td>2015-08-08 14:52:26</td>\n",
" <td>2016-03-21 19:43:51</td>\n",
" <td>2015-08-08 14:52:26</td>\n",
" <td>institutional</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://creativematter.skidmore.edu/</td>\n",
" <td>Creative Matter | Skidmore College Research</td>\n",
" <td>http://creativematter.skidmore.edu/do/oai/</td>\n",
" <td>NaN</td>\n",
" <td>http://creativematter.skidmore.edu/recent.rss</td>\n",
" <td>NaN</td>\n",
" <td>Welcome to Creative Matter, a repository for t...</td>\n",
" <td>TRUE</td>\n",
" <td>FALSE</td>\n",
" <td>FALSE</td>\n",
" <td>Skidmore College</td>\n",
" <td>http://www.skidmore.edu/</td>\n",
" <td>us</td>\n",
" <td>Saratoga Springs</td>\n",
" <td>43.0961</td>\n",
" <td>-73.7818</td>\n",
" <td>bepress</td>\n",
" <td>geoname_2_US</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2015-07-06 17:35:50</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>5882</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" rev_number eprint_status userid importid source dir \\\n",
"eprintid \n",
"1 633 archive 1 NaN NaN disk0/00/00/00/01 \n",
"10 511 archive 1 NaN NaN disk0/00/00/00/10 \n",
"1000 274 archive 1 NaN NaN disk0/00/00/10/00 \n",
"10001 20 archive 91 NaN NaN disk0/00/01/00/01 \n",
"10008 11 archive 404 NaN NaN disk0/00/01/00/08 \n",
"\n",
" datestamp lastmod status_changed \\\n",
"eprintid \n",
"1 2010-01-06 13:43:48 2011-07-18 05:40:07 2010-01-06 13:43:48 \n",
"10 2010-01-06 13:43:48 2011-07-18 05:40:13 2010-01-06 13:43:48 \n",
"1000 2010-01-06 13:45:01 2011-07-06 08:21:21 2010-01-06 13:45:01 \n",
"10001 2015-08-08 14:52:11 2016-03-21 19:44:01 2015-08-08 14:52:11 \n",
"10008 2015-08-08 14:52:26 2016-03-21 19:43:51 2015-08-08 14:52:26 \n",
"\n",
" type succeeds commentary metadata_visibility latitude \\\n",
"eprintid \n",
"1 subject NaN NaN show NaN \n",
"10 institutional NaN NaN show NaN \n",
"1000 subject NaN NaN show NaN \n",
"10001 subject NaN NaN show NaN \n",
"10008 institutional NaN NaN show NaN \n",
"\n",
" longitude relation_type relation_uri item_issues_id \\\n",
"eprintid \n",
"1 NaN NaN NaN NaN \n",
"10 NaN NaN NaN NaN \n",
"1000 NaN NaN NaN NaN \n",
"10001 NaN NaN NaN NaN \n",
"10008 NaN NaN NaN NaN \n",
"\n",
" item_issues_type item_issues_description item_issues_timestamp \\\n",
"eprintid \n",
"1 NaN NaN NaN \n",
"10 NaN NaN NaN \n",
"1000 NaN NaN NaN \n",
"10001 NaN NaN NaN \n",
"10008 NaN NaN NaN \n",
"\n",
" item_issues_status item_issues_reported_by item_issues_resolved_by \\\n",
"eprintid \n",
"1 NaN NaN NaN \n",
"10 NaN NaN NaN \n",
"1000 NaN NaN NaN \n",
"10001 NaN NaN NaN \n",
"10008 NaN NaN NaN \n",
"\n",
" item_issues_comment item_issues_count sword_depositor sword_slug \\\n",
"eprintid \n",
"1 NaN 0 NaN NaN \n",
"10 NaN 0 NaN NaN \n",
"1000 NaN 0 NaN NaN \n",
"10001 NaN NaN NaN NaN \n",
"10008 NaN NaN NaN NaN \n",
"\n",
" exemplar home_page \\\n",
"eprintid \n",
"1 NaN http://archivesic.ccsd.cnrs.fr/ \n",
"10 NaN http://www.diva-portal.org/mdh/ \n",
"1000 NaN http://pam.pisharp.org/ \n",
"10001 NaN http://edoc.sub.uni-hamburg.de/klimawandel/ \n",
"10008 NaN http://creativematter.skidmore.edu/ \n",
"\n",
" title \\\n",
"eprintid \n",
"1 @RCHIVESIC \n",
"10 Academic Archive On-line (Mälardalen Universit... \n",
"1000 PAM - Portuguese Archive of Mathematics \n",
"10001 Klimawandel Dokumentenserver \n",
"10008 Creative Matter | Skidmore College Research \n",
"\n",
" oai_pmh sword_endpoint \\\n",
"eprintid \n",
"1 http://archivesic.ccsd.cnrs.fr/oai/oai.php NaN \n",
"10 http://www.diva-portal.org/oai/mdh/OAI NaN \n",
"1000 NaN NaN \n",
"10001 http://edoc.sub.uni-hamburg.de/klimawandel/oai NaN \n",
"10008 http://creativematter.skidmore.edu/do/oai/ NaN \n",
"\n",
" rss_feed twitter_feed \\\n",
"eprintid \n",
"1 NaN NaN \n",
"10 NaN NaN \n",
"1000 NaN NaN \n",
"10001 NaN NaN \n",
"10008 http://creativematter.skidmore.edu/recent.rss NaN \n",
"\n",
" description fulltext \\\n",
"eprintid \n",
"1 NaN NaN \n",
"10 NaN TRUE \n",
"1000 NaN TRUE \n",
"10001 The \"Documentenserver Klimawandel\" (Repository... TRUE \n",
"10008 Welcome to Creative Matter, a repository for t... TRUE \n",
"\n",
" open_access mandate \\\n",
"eprintid \n",
"1 NaN NaN \n",
"10 TRUE NaN \n",
"1000 TRUE NaN \n",
"10001 TRUE TRUE \n",
"10008 FALSE FALSE \n",
"\n",
" organisation_title \\\n",
"eprintid \n",
"1 NaN \n",
"10 NaN \n",
"1000 NaN \n",
2021-07-26 11:15:14 +02:00
"10001 [KLIMZUG projects, Helmholtz-Zentrum Geesthach... \n",
2021-07-23 12:38:56 +02:00
"10008 Skidmore College \n",
"\n",
" organisation_home_page location_country \\\n",
"eprintid \n",
"1 NaN fr \n",
"10 NaN se \n",
"1000 NaN pt \n",
"10001 [http://www.climateservicecenter.de/, http://w... de \n",
"10008 http://www.skidmore.edu/ us \n",
"\n",
" location_city location_latitude location_longitude software \\\n",
"eprintid \n",
"1 NaN NaN NaN hal \n",
"10 Uppsala 59.8667 17.6333 diva \n",
"1000 Bellevue, WA 47.6034 -122.155 dspace \n",
"10001 Hamburg 53.5511 9.9937 opus \n",
"10008 Saratoga Springs 43.0961 -73.7818 bepress \n",
"\n",
" geoname version subjects date \\\n",
"eprintid \n",
"1 geoname_2_FR other NaN 2002-05-17 19:24:41 \n",
"10 geoname_2_SE other NaN 2005-12-08 13:15:22 \n",
"1000 geoname_2_PT other NaN 2006-05-04 10:48:14 \n",
2021-07-26 11:15:14 +02:00
"10001 geoname_2_DE other [S1, GE, HD, GF, G1] 2015-07-02 08:08:31 \n",
2021-07-23 12:38:56 +02:00
"10008 geoname_2_US other NaN 2015-07-06 17:35:50 \n",
"\n",
" note suggestions activity_low activity_medium activity_high \\\n",
"eprintid \n",
"1 NaN NaN 0 0 0 \n",
"10 NaN NaN 0 0 0 \n",
"1000 NaN NaN NaN NaN NaN \n",
"10001 NaN NaN NaN NaN NaN \n",
"10008 NaN NaN NaN NaN NaN \n",
"\n",
" recordcount recordhistory \\\n",
"eprintid \n",
"1 25 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
"10 100 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100... \n",
"1000 NaN NaN \n",
"10001 NaN NaN \n",
"10008 NaN NaN \n",
"\n",
" fulltexts_total fulltexts_docs fulltexts_rtotal fulltexts_rdocs \\\n",
"eprintid \n",
"1 NaN NaN NaN NaN \n",
"10 NaN NaN NaN NaN \n",
"1000 NaN NaN NaN NaN \n",
"10001 NaN NaN NaN NaN \n",
"10008 NaN NaN NaN NaN \n",
"\n",
" registry_name registry_id submit_to submitted_to_name \\\n",
"eprintid \n",
2021-07-26 11:15:14 +02:00
"1 [celestial, opendoar] [58, 669] NaN NaN \n",
"10 [celestial, opendoar] [258, 526] NaN NaN \n",
2021-07-23 12:38:56 +02:00
"1000 NaN NaN NaN NaN \n",
2021-07-26 11:15:14 +02:00
"10001 [celestial, opendoar] [3408, 5881] NaN NaN \n",
2021-07-23 12:38:56 +02:00
"10008 celestial 5882 NaN NaN \n",
"\n",
" submitted_to_done webometrics_rank webometrics_size \\\n",
"eprintid \n",
"1 NaN NaN NaN \n",
"10 NaN NaN NaN \n",
"1000 NaN NaN NaN \n",
"10001 NaN NaN NaN \n",
"10008 NaN NaN NaN \n",
"\n",
" webometrics_visibility webometrics_rich_files webometrics_scholar \\\n",
"eprintid \n",
"1 NaN NaN NaN \n",
"10 NaN NaN NaN \n",
"1000 NaN NaN NaN \n",
"10001 NaN NaN NaN \n",
"10008 NaN NaN NaN \n",
"\n",
" monthly_deposits total_deposits association \n",
"eprintid \n",
"1 NaN NaN NaN \n",
"10 NaN NaN NaN \n",
"1000 NaN NaN NaN \n",
"10001 NaN NaN NaN \n",
"10008 NaN NaN NaN "
]
},
2021-07-26 11:15:14 +02:00
"execution_count": 8,
2021-07-23 12:38:56 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def value_or_list(cell_set):\n",
" copy = set(cell_set)\n",
" copy.discard(np.nan) \n",
" if len(copy) == 0:\n",
" return np.nan\n",
" if len(copy) == 1:\n",
" return copy.pop()\n",
" return list(copy)\n",
" \n",
"roar_df = roar_df.applymap(value_or_list)\n",
"roar_df.head()"
]
},
{
"cell_type": "code",
2021-07-26 11:15:14 +02:00
"execution_count": 9,
2021-07-23 12:38:56 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>eprintid</th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>4188</th>\n",
" <td>606</td>\n",
" <td>657</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/06/06</td>\n",
" <td>2010-01-06 13:44:31</td>\n",
" <td>2016-04-17 21:53:14</td>\n",
" <td>2010-01-06 13:44:31</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://hal.archives-ouvertes.fr/</td>\n",
" <td>HAL: Hyper Article en Ligne</td>\n",
" <td>http://hal.archives-ouvertes.fr/oai/oai.php</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>hal</td>\n",
" <td>geoname_2_FR</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>1998-11-02 11:53:57</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>675816</td>\n",
" <td>4,12,17,26,43,57,81,185,431,861,1184,1517,2442...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-26 11:15:14 +02:00
" <td>[celestial, roarmap, opendoar]</td>\n",
2021-07-23 12:38:56 +02:00
" <td>[69, 166, 1106]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>6</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>3063</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" eprintid rev_number eprint_status userid importid source \\\n",
"4188 606 657 archive 1 NaN NaN \n",
"\n",
" dir datestamp lastmod \\\n",
"4188 disk0/00/00/06/06 2010-01-06 13:44:31 2016-04-17 21:53:14 \n",
"\n",
" status_changed type succeeds commentary metadata_visibility \\\n",
"4188 2010-01-06 13:44:31 subject NaN NaN show \n",
"\n",
" latitude longitude relation_type relation_uri item_issues_id \\\n",
"4188 NaN NaN NaN NaN NaN \n",
"\n",
" item_issues_type item_issues_description item_issues_timestamp \\\n",
"4188 NaN NaN NaN \n",
"\n",
" item_issues_status item_issues_reported_by item_issues_resolved_by \\\n",
"4188 NaN NaN NaN \n",
"\n",
" item_issues_comment item_issues_count sword_depositor sword_slug \\\n",
"4188 NaN 0 NaN NaN \n",
"\n",
" exemplar home_page title \\\n",
"4188 NaN http://hal.archives-ouvertes.fr/ HAL: Hyper Article en Ligne \n",
"\n",
" oai_pmh sword_endpoint rss_feed \\\n",
"4188 http://hal.archives-ouvertes.fr/oai/oai.php NaN NaN \n",
"\n",
" twitter_feed description fulltext open_access mandate organisation_title \\\n",
"4188 NaN NaN TRUE TRUE NaN NaN \n",
"\n",
" organisation_home_page location_country location_city location_latitude \\\n",
"4188 NaN fr NaN NaN \n",
"\n",
" location_longitude software geoname version subjects \\\n",
"4188 NaN hal geoname_2_FR other NaN \n",
"\n",
" date note suggestions activity_low activity_medium \\\n",
"4188 1998-11-02 11:53:57 NaN NaN 0 0 \n",
"\n",
" activity_high recordcount \\\n",
"4188 2 675816 \n",
"\n",
" recordhistory fulltexts_total \\\n",
"4188 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n",
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs \\\n",
"4188 NaN NaN NaN \n",
"\n",
" registry_name registry_id submit_to \\\n",
2021-07-26 11:15:14 +02:00
"4188 [celestial, roarmap, opendoar] [69, 166, 1106] NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" submitted_to_name submitted_to_done webometrics_rank webometrics_size \\\n",
"4188 NaN NaN 1 6 \n",
"\n",
" webometrics_visibility webometrics_rich_files webometrics_scholar \\\n",
"4188 1 1 1 \n",
"\n",
" monthly_deposits total_deposits \\\n",
"4188 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 3063 \n",
"\n",
" association \n",
"4188 NaN "
]
},
2021-07-26 11:15:14 +02:00
"execution_count": 9,
2021-07-23 12:38:56 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df.reset_index(inplace=True)\n",
"roar_df[roar_df.eprintid == '606']"
]
},
{
"cell_type": "code",
2021-07-26 11:15:14 +02:00
"execution_count": 10,
2021-07-23 12:38:56 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>eprintid</th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>5375</td>\n",
" <td>5375</td>\n",
" <td>5375</td>\n",
" <td>5375</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>5375</td>\n",
" <td>5375</td>\n",
" <td>5375</td>\n",
" <td>5375</td>\n",
" <td>5375</td>\n",
" <td>107</td>\n",
" <td>0.0</td>\n",
" <td>5375</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>63</td>\n",
" <td>63</td>\n",
" <td>63</td>\n",
" <td>63</td>\n",
" <td>63</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2245</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>265</td>\n",
" <td>5368</td>\n",
" <td>5373</td>\n",
" <td>4267</td>\n",
" <td>176</td>\n",
" <td>1521</td>\n",
" <td>115</td>\n",
" <td>3782</td>\n",
" <td>4127</td>\n",
" <td>4127</td>\n",
" <td>3676</td>\n",
" <td>4396</td>\n",
" <td>4226</td>\n",
" <td>5080</td>\n",
" <td>3655</td>\n",
" <td>3681</td>\n",
" <td>3664</td>\n",
" <td>4637</td>\n",
" <td>4671</td>\n",
" <td>5375</td>\n",
" <td>1250</td>\n",
" <td>5360</td>\n",
" <td>215</td>\n",
" <td>187</td>\n",
" <td>2291</td>\n",
" <td>2291</td>\n",
" <td>2291</td>\n",
" <td>2293</td>\n",
" <td>2291</td>\n",
" <td>270</td>\n",
" <td>258</td>\n",
" <td>270</td>\n",
" <td>258</td>\n",
" <td>4603</td>\n",
" <td>4578</td>\n",
" <td>293</td>\n",
" <td>205</td>\n",
" <td>205</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>756</td>\n",
" <td>756</td>\n",
" <td>217</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>5375</td>\n",
" <td>658</td>\n",
" <td>1</td>\n",
" <td>2135</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>5375</td>\n",
" <td>4127</td>\n",
" <td>3966</td>\n",
" <td>4158</td>\n",
" <td>12</td>\n",
" <td>107</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>48</td>\n",
" <td>5</td>\n",
" <td>62</td>\n",
" <td>4</td>\n",
" <td>3</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>4</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>5202</td>\n",
" <td>5076</td>\n",
" <td>3994</td>\n",
" <td>170</td>\n",
" <td>1468</td>\n",
" <td>111</td>\n",
" <td>3304</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>3802</td>\n",
2021-07-26 11:15:14 +02:00
" <td>3771</td>\n",
2021-07-23 12:38:56 +02:00
" <td>143</td>\n",
" <td>1861</td>\n",
" <td>2887</td>\n",
" <td>2917</td>\n",
" <td>31</td>\n",
" <td>126</td>\n",
" <td>53</td>\n",
" <td>906</td>\n",
" <td>4830</td>\n",
" <td>207</td>\n",
" <td>171</td>\n",
" <td>72</td>\n",
" <td>54</td>\n",
" <td>16</td>\n",
" <td>741</td>\n",
" <td>1704</td>\n",
" <td>135</td>\n",
" <td>118</td>\n",
" <td>134</td>\n",
" <td>117</td>\n",
" <td>7</td>\n",
2021-07-26 11:15:14 +02:00
" <td>4257</td>\n",
2021-07-23 12:38:56 +02:00
" <td>7</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>148</td>\n",
" <td>146</td>\n",
" <td>143</td>\n",
" <td>346</td>\n",
" <td>342</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
2021-07-26 11:15:14 +02:00
" <td>1259</td>\n",
2021-07-23 12:38:56 +02:00
" <td>11</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-26 11:15:14 +02:00
" <td>disk0/00/00/14/07</td>\n",
2021-07-23 12:38:56 +02:00
" <td>2010-01-06 13:43:48</td>\n",
2021-07-26 11:15:14 +02:00
" <td>2016-05-02 05:43:04</td>\n",
2021-07-23 12:38:56 +02:00
" <td>2010-01-06 13:43:48</td>\n",
" <td>institutional</td>\n",
2021-07-26 11:15:14 +02:00
" <td>12637</td>\n",
2021-07-23 12:38:56 +02:00
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>bad_oai_pmh_url_0</td>\n",
" <td>duplicate_title</td>\n",
" <td>Duplicate title to &lt;xhtml:table xmlns:xhtml=\"h...</td>\n",
" <td>2010-01-13 10:44:49</td>\n",
" <td>discovered</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>FALSE</td>\n",
" <td>http://eprints.upnjatim.ac.id/</td>\n",
" <td>Repositorio Institucional</td>\n",
" <td>http://kce.docressources.info/ws/PMBWs_2</td>\n",
2021-07-26 11:15:14 +02:00
" <td>http://npl.csircentral.net/</td>\n",
2021-07-23 12:38:56 +02:00
" <td>http://eprints.upnjatim.ac.id/cgi/latest_tool?...</td>\n",
" <td>http://twitter.com/bu_ufsc</td>\n",
" <td>info:other:archives.eprints.org:import</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>FALSE</td>\n",
" <td>Chinese Academy of Science (中国科学院)</td>\n",
" <td>http://www.cas.cn/</td>\n",
" <td>us</td>\n",
" <td>Lima</td>\n",
" <td>34.1607</td>\n",
" <td>-118.139</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_US</td>\n",
" <td>other</td>\n",
" <td>K1</td>\n",
" <td>2006-05-04 10:48:14</td>\n",
2021-07-26 11:15:14 +02:00
" <td>DSpace@Işık is a growing collection of Işık Un...</td>\n",
2021-07-23 12:38:56 +02:00
" <td>This repository is hosted by the Texas Digital...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>100</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
2021-07-26 11:15:14 +02:00
" <td>[celestial, opendoar]</td>\n",
" <td>[2246, 1879]</td>\n",
" <td>[celestial, roarmap, opendoar]</td>\n",
2021-07-23 12:38:56 +02:00
" <td>opendoar</td>\n",
" <td>2021-01-25</td>\n",
2021-07-26 11:15:14 +02:00
" <td>367</td>\n",
" <td>738</td>\n",
" <td>668</td>\n",
2021-07-23 12:38:56 +02:00
" <td>824</td>\n",
" <td>806</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>0</td>\n",
" <td>russell_group</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>1</td>\n",
" <td>332</td>\n",
" <td>5375</td>\n",
" <td>1333</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>16</td>\n",
" <td>8</td>\n",
" <td>16</td>\n",
" <td>3795</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>5334</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>15</td>\n",
" <td>33</td>\n",
" <td>2</td>\n",
" <td>45</td>\n",
" <td>38</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2204</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>258</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>4</td>\n",
" <td>2</td>\n",
" <td>5</td>\n",
" <td>2</td>\n",
" <td>112</td>\n",
" <td>2758</td>\n",
" <td>2652</td>\n",
" <td>2699</td>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>886</td>\n",
" <td>69</td>\n",
" <td>25</td>\n",
" <td>25</td>\n",
" <td>2307</td>\n",
" <td>840</td>\n",
" <td>4771</td>\n",
" <td>53</td>\n",
" <td>99</td>\n",
" <td>2</td>\n",
" <td>9</td>\n",
" <td>2015</td>\n",
" <td>2077</td>\n",
" <td>2213</td>\n",
" <td>733</td>\n",
" <td>95</td>\n",
" <td>113</td>\n",
" <td>114</td>\n",
" <td>113</td>\n",
" <td>114</td>\n",
" <td>2114</td>\n",
" <td>4</td>\n",
" <td>92</td>\n",
" <td>205</td>\n",
" <td>205</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>387</td>\n",
" <td>387</td>\n",
" <td>127</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
2021-07-23 12:38:56 +02:00
" <th>max</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
2021-07-22 11:35:40 +02:00
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
2021-07-23 12:38:56 +02:00
" eprintid rev_number eprint_status userid importid source \\\n",
"count 5375 5375 5375 5375 0.0 0.0 \n",
"unique 5375 658 1 2135 NaN NaN \n",
2021-07-26 11:15:14 +02:00
"top 1259 11 archive 1 NaN NaN \n",
2021-07-23 12:38:56 +02:00
"freq 1 332 5375 1333 NaN NaN \n",
"mean NaN NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN NaN \n",
"\n",
" dir datestamp lastmod \\\n",
"count 5375 5375 5375 \n",
"unique 5375 4127 3966 \n",
2021-07-26 11:15:14 +02:00
"top disk0/00/00/14/07 2010-01-06 13:43:48 2016-05-02 05:43:04 \n",
2021-07-23 12:38:56 +02:00
"freq 1 16 8 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" status_changed type succeeds commentary \\\n",
"count 5375 5375 107 0.0 \n",
"unique 4158 12 107 NaN \n",
2021-07-26 11:15:14 +02:00
"top 2010-01-06 13:43:48 institutional 12637 NaN \n",
2021-07-23 12:38:56 +02:00
"freq 16 3795 1 NaN \n",
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n",
"\n",
" metadata_visibility latitude longitude relation_type relation_uri \\\n",
"count 5375 0.0 0.0 0.0 0.0 \n",
"unique 2 NaN NaN NaN NaN \n",
"top show NaN NaN NaN NaN \n",
"freq 5334 NaN NaN NaN NaN \n",
"mean NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN \n",
"\n",
" item_issues_id item_issues_type \\\n",
"count 63 63 \n",
"unique 48 5 \n",
"top bad_oai_pmh_url_0 duplicate_title \n",
"freq 15 33 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" item_issues_description \\\n",
"count 63 \n",
"unique 62 \n",
"top Duplicate title to <xhtml:table xmlns:xhtml=\"h... \n",
"freq 2 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
"count 63 63 0.0 \n",
"unique 4 3 NaN \n",
"top 2010-01-13 10:44:49 discovered NaN \n",
"freq 45 38 NaN \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
"count 0.0 0.0 2245 \n",
"unique NaN NaN 4 \n",
"top NaN NaN 0 \n",
"freq NaN NaN 2204 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" sword_depositor sword_slug exemplar home_page \\\n",
"count 0.0 0.0 265 5368 \n",
"unique NaN NaN 2 5202 \n",
"top NaN NaN FALSE http://eprints.upnjatim.ac.id/ \n",
"freq NaN NaN 258 4 \n",
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n",
"\n",
" title oai_pmh \\\n",
"count 5373 4267 \n",
"unique 5076 3994 \n",
"top Repositorio Institucional http://kce.docressources.info/ws/PMBWs_2 \n",
"freq 7 4 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
2021-07-26 11:15:14 +02:00
" sword_endpoint \\\n",
"count 176 \n",
"unique 170 \n",
"top http://npl.csircentral.net/ \n",
"freq 2 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" rss_feed \\\n",
"count 1521 \n",
"unique 1468 \n",
"top http://eprints.upnjatim.ac.id/cgi/latest_tool?... \n",
"freq 5 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" twitter_feed description \\\n",
"count 115 3782 \n",
"unique 111 3304 \n",
"top http://twitter.com/bu_ufsc info:other:archives.eprints.org:import \n",
"freq 2 112 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" fulltext open_access mandate organisation_title \\\n",
"count 4127 4127 3676 4396 \n",
"unique 2 2 2 3802 \n",
"top TRUE TRUE FALSE Chinese Academy of Science (中国科学院) \n",
"freq 2758 2652 2699 9 \n",
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n",
"\n",
" organisation_home_page location_country location_city \\\n",
"count 4226 5080 3655 \n",
2021-07-26 11:15:14 +02:00
"unique 3771 143 1861 \n",
2021-07-23 12:38:56 +02:00
"top http://www.cas.cn/ us Lima \n",
"freq 9 886 69 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" location_latitude location_longitude software geoname version \\\n",
"count 3681 3664 4637 4671 5375 \n",
"unique 2887 2917 31 126 53 \n",
"top 34.1607 -118.139 dspace geoname_2_US other \n",
"freq 25 25 2307 840 4771 \n",
"mean NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN NaN \n",
"\n",
" subjects date \\\n",
"count 1250 5360 \n",
"unique 906 4830 \n",
"top K1 2006-05-04 10:48:14 \n",
"freq 53 99 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" note \\\n",
"count 215 \n",
"unique 207 \n",
2021-07-26 11:15:14 +02:00
"top DSpace@Işık is a growing collection of Işık Un... \n",
2021-07-23 12:38:56 +02:00
"freq 2 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN \n",
"\n",
" suggestions activity_low \\\n",
"count 187 2291 \n",
"unique 171 72 \n",
"top This repository is hosted by the Texas Digital... 0 \n",
"freq 9 2015 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" activity_medium activity_high recordcount \\\n",
"count 2291 2291 2293 \n",
"unique 54 16 741 \n",
"top 0 0 100 \n",
"freq 2077 2213 733 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" recordhistory fulltexts_total \\\n",
"count 2291 270 \n",
"unique 1704 135 \n",
"top 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 0 \n",
"freq 95 113 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name \\\n",
"count 258 270 258 4603 \n",
"unique 118 134 117 7 \n",
2021-07-26 11:15:14 +02:00
"top 0 0 0 [celestial, opendoar] \n",
2021-07-23 12:38:56 +02:00
"freq 114 113 114 2114 \n",
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
"min NaN NaN NaN NaN \n",
"25% NaN NaN NaN NaN \n",
"50% NaN NaN NaN NaN \n",
"75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n",
"\n",
2021-07-26 11:15:14 +02:00
" registry_id submit_to submitted_to_name \\\n",
"count 4578 293 205 \n",
"unique 4257 7 1 \n",
"top [2246, 1879] [celestial, roarmap, opendoar] opendoar \n",
"freq 4 92 205 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
2021-07-23 12:38:56 +02:00
"\n",
" submitted_to_done webometrics_rank webometrics_size \\\n",
"count 205 148 148 \n",
"unique 1 148 148 \n",
2021-07-26 11:15:14 +02:00
"top 2021-01-25 367 738 \n",
2021-07-23 12:38:56 +02:00
"freq 205 1 1 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" webometrics_visibility webometrics_rich_files webometrics_scholar \\\n",
"count 148 148 148 \n",
"unique 148 146 143 \n",
2021-07-26 11:15:14 +02:00
"top 668 824 806 \n",
2021-07-23 12:38:56 +02:00
"freq 1 3 5 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
"min NaN NaN NaN \n",
"25% NaN NaN NaN \n",
"50% NaN NaN NaN \n",
"75% NaN NaN NaN \n",
"max NaN NaN NaN \n",
"\n",
" monthly_deposits total_deposits \\\n",
"count 756 756 \n",
"unique 346 342 \n",
"top 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 0 \n",
"freq 387 387 \n",
"mean NaN NaN \n",
"std NaN NaN \n",
"min NaN NaN \n",
"25% NaN NaN \n",
"50% NaN NaN \n",
"75% NaN NaN \n",
"max NaN NaN \n",
"\n",
" association \n",
"count 217 \n",
"unique 3 \n",
"top russell_group \n",
"freq 127 \n",
"mean NaN \n",
"std NaN \n",
"min NaN \n",
"25% NaN \n",
"50% NaN \n",
"75% NaN \n",
"max NaN "
2021-07-22 11:35:40 +02:00
]
},
2021-07-26 11:15:14 +02:00
"execution_count": 10,
2021-07-22 11:35:40 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df.describe(include='all')"
]
},
{
"cell_type": "code",
2021-07-26 11:15:14 +02:00
"execution_count": 11,
2021-07-22 11:35:40 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2021-07-23 12:38:56 +02:00
"eprintid 0\n",
"rev_number 0\n",
"eprint_status 0\n",
"userid 0\n",
"importid 5375\n",
"source 5375\n",
"dir 0\n",
"datestamp 0\n",
"lastmod 0\n",
"status_changed 0\n",
"type 0\n",
"succeeds 5268\n",
"commentary 5375\n",
"metadata_visibility 0\n",
"latitude 5375\n",
"longitude 5375\n",
"relation_type 5375\n",
"relation_uri 5375\n",
"item_issues_id 5312\n",
"item_issues_type 5312\n",
"item_issues_description 5312\n",
"item_issues_timestamp 5312\n",
"item_issues_status 5312\n",
"item_issues_reported_by 5375\n",
"item_issues_resolved_by 5375\n",
"item_issues_comment 5375\n",
"item_issues_count 3130\n",
"sword_depositor 5375\n",
"sword_slug 5375\n",
"exemplar 5110\n",
"home_page 7\n",
"title 2\n",
"oai_pmh 1108\n",
"sword_endpoint 5199\n",
"rss_feed 3854\n",
"twitter_feed 5260\n",
"description 1593\n",
"fulltext 1248\n",
"open_access 1248\n",
"mandate 1699\n",
"dtype: int64"
2021-07-22 11:35:40 +02:00
]
},
2021-07-26 11:15:14 +02:00
"execution_count": 11,
2021-07-22 11:35:40 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-07-23 12:38:56 +02:00
"roar_df.isna().sum()[:40]"
2021-07-22 11:35:40 +02:00
]
},
{
"cell_type": "code",
2021-07-26 11:15:14 +02:00
"execution_count": 12,
2021-07-22 11:35:40 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2021-07-23 12:38:56 +02:00
"organisation_title 979\n",
"organisation_home_page 1149\n",
"location_country 295\n",
"location_city 1720\n",
"location_latitude 1694\n",
"location_longitude 1711\n",
"software 738\n",
"geoname 704\n",
"version 0\n",
"subjects 4125\n",
"date 15\n",
"note 5160\n",
"suggestions 5188\n",
"activity_low 3084\n",
"activity_medium 3084\n",
"activity_high 3084\n",
"recordcount 3082\n",
"recordhistory 3084\n",
"fulltexts_total 5105\n",
"fulltexts_docs 5117\n",
"fulltexts_rtotal 5105\n",
"fulltexts_rdocs 5117\n",
"registry_name 772\n",
"registry_id 797\n",
"submit_to 5082\n",
"submitted_to_name 5170\n",
"submitted_to_done 5170\n",
"webometrics_rank 5227\n",
"webometrics_size 5227\n",
"webometrics_visibility 5227\n",
"webometrics_rich_files 5227\n",
"webometrics_scholar 5227\n",
"monthly_deposits 4619\n",
"total_deposits 4619\n",
"association 5158\n",
"dtype: int64"
2021-07-22 11:35:40 +02:00
]
},
2021-07-26 11:15:14 +02:00
"execution_count": 12,
2021-07-22 11:35:40 +02:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2021-07-23 12:38:56 +02:00
"roar_df.isna().sum()[40:]"
2021-07-22 11:35:40 +02:00
]
2021-07-23 12:38:56 +02:00
},
2021-07-26 11:15:14 +02:00
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([nan, 'TRUE', 'FALSE'], dtype=object)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df.open_access.unique()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['subject', 'institutional', 'researchdata', 'theses', 'database',\n",
" 'other', 'journal', 'opendata', 'demonstration', 'multi',\n",
" 'learning', 'webobservatory'], dtype=object)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df.type.unique()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([nan, 'TRUE', 'FALSE'], dtype=object)"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df.mandate.unique()"
]
},
2021-07-23 12:38:56 +02:00
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
2021-07-22 11:35:40 +02:00
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}