5146 lines
212 KiB
Plaintext
5146 lines
212 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import ast\n",
|
|
"import csv\n",
|
|
"import json\n",
|
|
"\n",
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"import requests\n",
|
|
"\n",
|
|
"pd.set_option('display.max_columns', None)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Loading datasets"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>eprintid</th>\n",
|
|
" <th>rev_number</th>\n",
|
|
" <th>eprint_status</th>\n",
|
|
" <th>userid</th>\n",
|
|
" <th>importid</th>\n",
|
|
" <th>source</th>\n",
|
|
" <th>dir</th>\n",
|
|
" <th>datestamp</th>\n",
|
|
" <th>lastmod</th>\n",
|
|
" <th>status_changed</th>\n",
|
|
" <th>type</th>\n",
|
|
" <th>succeeds</th>\n",
|
|
" <th>commentary</th>\n",
|
|
" <th>metadata_visibility</th>\n",
|
|
" <th>latitude</th>\n",
|
|
" <th>longitude</th>\n",
|
|
" <th>relation_type</th>\n",
|
|
" <th>relation_uri</th>\n",
|
|
" <th>item_issues_id</th>\n",
|
|
" <th>item_issues_type</th>\n",
|
|
" <th>item_issues_description</th>\n",
|
|
" <th>item_issues_timestamp</th>\n",
|
|
" <th>item_issues_status</th>\n",
|
|
" <th>item_issues_reported_by</th>\n",
|
|
" <th>item_issues_resolved_by</th>\n",
|
|
" <th>item_issues_comment</th>\n",
|
|
" <th>item_issues_count</th>\n",
|
|
" <th>sword_depositor</th>\n",
|
|
" <th>sword_slug</th>\n",
|
|
" <th>exemplar</th>\n",
|
|
" <th>home_page</th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>oai_pmh</th>\n",
|
|
" <th>sword_endpoint</th>\n",
|
|
" <th>rss_feed</th>\n",
|
|
" <th>twitter_feed</th>\n",
|
|
" <th>description</th>\n",
|
|
" <th>fulltext</th>\n",
|
|
" <th>open_access</th>\n",
|
|
" <th>mandate</th>\n",
|
|
" <th>organisation_title</th>\n",
|
|
" <th>organisation_home_page</th>\n",
|
|
" <th>location_country</th>\n",
|
|
" <th>location_city</th>\n",
|
|
" <th>location_latitude</th>\n",
|
|
" <th>location_longitude</th>\n",
|
|
" <th>software</th>\n",
|
|
" <th>geoname</th>\n",
|
|
" <th>version</th>\n",
|
|
" <th>subjects</th>\n",
|
|
" <th>date</th>\n",
|
|
" <th>note</th>\n",
|
|
" <th>suggestions</th>\n",
|
|
" <th>activity_low</th>\n",
|
|
" <th>activity_medium</th>\n",
|
|
" <th>activity_high</th>\n",
|
|
" <th>recordcount</th>\n",
|
|
" <th>recordhistory</th>\n",
|
|
" <th>fulltexts_total</th>\n",
|
|
" <th>fulltexts_docs</th>\n",
|
|
" <th>fulltexts_rtotal</th>\n",
|
|
" <th>fulltexts_rdocs</th>\n",
|
|
" <th>registry_name</th>\n",
|
|
" <th>registry_id</th>\n",
|
|
" <th>submit_to</th>\n",
|
|
" <th>submitted_to_name</th>\n",
|
|
" <th>submitted_to_done</th>\n",
|
|
" <th>webometrics_rank</th>\n",
|
|
" <th>webometrics_size</th>\n",
|
|
" <th>webometrics_visibility</th>\n",
|
|
" <th>webometrics_rich_files</th>\n",
|
|
" <th>webometrics_scholar</th>\n",
|
|
" <th>monthly_deposits</th>\n",
|
|
" <th>total_deposits</th>\n",
|
|
" <th>association</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>921</td>\n",
|
|
" <td>517</td>\n",
|
|
" <td>archive</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>disk0/00/00/09/21</td>\n",
|
|
" <td>2010-01-06 13:44:55</td>\n",
|
|
" <td>2016-04-17 21:54:11</td>\n",
|
|
" <td>2010-01-06 13:44:55</td>\n",
|
|
" <td>theses</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>show</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>FALSE</td>\n",
|
|
" <td>http://alcme.oclc.org/ndltd/index.html</td>\n",
|
|
" <td>Networked Digital Library of Theses and Disser...</td>\n",
|
|
" <td>http://alcme.oclc.org/ndltd/servlet/OAIHandler</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>us</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>etddb</td>\n",
|
|
" <td>geoname_2_US</td>\n",
|
|
" <td>other</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2006-04-18</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1533400</td>\n",
|
|
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,94862,168074,5...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>celestial</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>1489</td>\n",
|
|
" <td>556</td>\n",
|
|
" <td>archive</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>disk0/00/00/14/89</td>\n",
|
|
" <td>2010-01-06 13:46:05</td>\n",
|
|
" <td>2016-04-17 21:49:00</td>\n",
|
|
" <td>2010-01-06 13:46:05</td>\n",
|
|
" <td>other</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>show</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>http://prensahistorica.mcu.es/prensahistorica/...</td>\n",
|
|
" <td>Virtual Library of Historical Press</td>\n",
|
|
" <td>http://prensahistorica.mcu.es/prensahistorica/...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>The Virtual Library of Historical Press is the...</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>es</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>other</td>\n",
|
|
" <td>geoname_2_ES</td>\n",
|
|
" <td>other</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2006-04-04 00:00:00</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>1008486</td>\n",
|
|
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1431,1431,1761...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>celestial</td>\n",
|
|
" <td>858</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
|
|
" <td>47500</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>606</td>\n",
|
|
" <td>657</td>\n",
|
|
" <td>archive</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>disk0/00/00/06/06</td>\n",
|
|
" <td>2010-01-06 13:44:31</td>\n",
|
|
" <td>2016-04-17 21:53:14</td>\n",
|
|
" <td>2010-01-06 13:44:31</td>\n",
|
|
" <td>subject</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>show</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>http://hal.archives-ouvertes.fr/</td>\n",
|
|
" <td>HAL: Hyper Article en Ligne</td>\n",
|
|
" <td>http://hal.archives-ouvertes.fr/oai/oai.php</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>fr</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>hal</td>\n",
|
|
" <td>geoname_2_FR</td>\n",
|
|
" <td>other</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1998-11-02 11:53:57</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>675816</td>\n",
|
|
" <td>4,12,17,26,43,57,81,185,431,861,1184,1517,2442...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>opendoar</td>\n",
|
|
" <td>166</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>6</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
|
|
" <td>3063</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>606</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>celestial</td>\n",
|
|
" <td>1106</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>606</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>roarmap</td>\n",
|
|
" <td>69</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" eprintid rev_number eprint_status userid importid source dir \\\n",
|
|
"0 921 517 archive 1 NaN NaN disk0/00/00/09/21 \n",
|
|
"1 1489 556 archive 1 NaN NaN disk0/00/00/14/89 \n",
|
|
"2 606 657 archive 1 NaN NaN disk0/00/00/06/06 \n",
|
|
"3 606 NaN NaN NaN NaN NaN NaN \n",
|
|
"4 606 NaN NaN NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" datestamp lastmod status_changed type \\\n",
|
|
"0 2010-01-06 13:44:55 2016-04-17 21:54:11 2010-01-06 13:44:55 theses \n",
|
|
"1 2010-01-06 13:46:05 2016-04-17 21:49:00 2010-01-06 13:46:05 other \n",
|
|
"2 2010-01-06 13:44:31 2016-04-17 21:53:14 2010-01-06 13:44:31 subject \n",
|
|
"3 NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" succeeds commentary metadata_visibility latitude longitude relation_type \\\n",
|
|
"0 NaN NaN show NaN NaN NaN \n",
|
|
"1 NaN NaN show NaN NaN NaN \n",
|
|
"2 NaN NaN show NaN NaN NaN \n",
|
|
"3 NaN NaN NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" relation_uri item_issues_id item_issues_type item_issues_description \\\n",
|
|
"0 NaN NaN NaN NaN \n",
|
|
"1 NaN NaN NaN NaN \n",
|
|
"2 NaN NaN NaN NaN \n",
|
|
"3 NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
|
|
"0 NaN NaN NaN \n",
|
|
"1 NaN NaN NaN \n",
|
|
"2 NaN NaN NaN \n",
|
|
"3 NaN NaN NaN \n",
|
|
"4 NaN NaN NaN \n",
|
|
"\n",
|
|
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
|
|
"0 NaN NaN 0 \n",
|
|
"1 NaN NaN 0 \n",
|
|
"2 NaN NaN 0 \n",
|
|
"3 NaN NaN NaN \n",
|
|
"4 NaN NaN NaN \n",
|
|
"\n",
|
|
" sword_depositor sword_slug exemplar \\\n",
|
|
"0 NaN NaN FALSE \n",
|
|
"1 NaN NaN NaN \n",
|
|
"2 NaN NaN NaN \n",
|
|
"3 NaN NaN NaN \n",
|
|
"4 NaN NaN NaN \n",
|
|
"\n",
|
|
" home_page \\\n",
|
|
"0 http://alcme.oclc.org/ndltd/index.html \n",
|
|
"1 http://prensahistorica.mcu.es/prensahistorica/... \n",
|
|
"2 http://hal.archives-ouvertes.fr/ \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" title \\\n",
|
|
"0 Networked Digital Library of Theses and Disser... \n",
|
|
"1 Virtual Library of Historical Press \n",
|
|
"2 HAL: Hyper Article en Ligne \n",
|
|
"3 NaN \n",
|
|
"4 NaN \n",
|
|
"\n",
|
|
" oai_pmh sword_endpoint rss_feed \\\n",
|
|
"0 http://alcme.oclc.org/ndltd/servlet/OAIHandler NaN NaN \n",
|
|
"1 http://prensahistorica.mcu.es/prensahistorica/... NaN NaN \n",
|
|
"2 http://hal.archives-ouvertes.fr/oai/oai.php NaN NaN \n",
|
|
"3 NaN NaN NaN \n",
|
|
"4 NaN NaN NaN \n",
|
|
"\n",
|
|
" twitter_feed description fulltext \\\n",
|
|
"0 NaN NaN TRUE \n",
|
|
"1 NaN The Virtual Library of Historical Press is the... TRUE \n",
|
|
"2 NaN NaN TRUE \n",
|
|
"3 NaN NaN NaN \n",
|
|
"4 NaN NaN NaN \n",
|
|
"\n",
|
|
" open_access mandate organisation_title organisation_home_page \\\n",
|
|
"0 TRUE NaN NaN NaN \n",
|
|
"1 TRUE NaN NaN NaN \n",
|
|
"2 TRUE NaN NaN NaN \n",
|
|
"3 NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" location_country location_city location_latitude location_longitude \\\n",
|
|
"0 us NaN NaN NaN \n",
|
|
"1 es NaN NaN NaN \n",
|
|
"2 fr NaN NaN NaN \n",
|
|
"3 NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" software geoname version subjects date note \\\n",
|
|
"0 etddb geoname_2_US other NaN 2006-04-18 NaN \n",
|
|
"1 other geoname_2_ES other NaN 2006-04-04 00:00:00 NaN \n",
|
|
"2 hal geoname_2_FR other NaN 1998-11-02 11:53:57 NaN \n",
|
|
"3 NaN NaN NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" suggestions activity_low activity_medium activity_high recordcount \\\n",
|
|
"0 NaN 0 0 0 1533400 \n",
|
|
"1 NaN 0 0 0 1008486 \n",
|
|
"2 NaN 0 0 2 675816 \n",
|
|
"3 NaN NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" recordhistory fulltexts_total \\\n",
|
|
"0 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,94862,168074,5... NaN \n",
|
|
"1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1431,1431,1761... NaN \n",
|
|
"2 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN \n",
|
|
"\n",
|
|
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name registry_id \\\n",
|
|
"0 NaN NaN NaN celestial 5 \n",
|
|
"1 NaN NaN NaN celestial 858 \n",
|
|
"2 NaN NaN NaN opendoar 166 \n",
|
|
"3 NaN NaN NaN celestial 1106 \n",
|
|
"4 NaN NaN NaN roarmap 69 \n",
|
|
"\n",
|
|
" submit_to submitted_to_name submitted_to_done webometrics_rank \\\n",
|
|
"0 NaN NaN NaN NaN \n",
|
|
"1 NaN NaN NaN NaN \n",
|
|
"2 NaN NaN NaN 1 \n",
|
|
"3 NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" webometrics_size webometrics_visibility webometrics_rich_files \\\n",
|
|
"0 NaN NaN NaN \n",
|
|
"1 NaN NaN NaN \n",
|
|
"2 6 1 1 \n",
|
|
"3 NaN NaN NaN \n",
|
|
"4 NaN NaN NaN \n",
|
|
"\n",
|
|
" webometrics_scholar monthly_deposits \\\n",
|
|
"0 NaN 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
|
|
"1 NaN 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
|
|
"2 1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN \n",
|
|
"\n",
|
|
" total_deposits association \n",
|
|
"0 0 NaN \n",
|
|
"1 47500 NaN \n",
|
|
"2 3063 NaN \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN "
|
|
]
|
|
},
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"roar_df = pd.read_csv('../data/raw/export_roar_CSV.csv', dtype='str')\n",
|
|
"roar_df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Index(['eprintid', 'rev_number', 'eprint_status', 'userid', 'importid',\n",
|
|
" 'source', 'dir', 'datestamp', 'lastmod', 'status_changed', 'type',\n",
|
|
" 'succeeds', 'commentary', 'metadata_visibility', 'latitude',\n",
|
|
" 'longitude', 'relation_type', 'relation_uri', 'item_issues_id',\n",
|
|
" 'item_issues_type', 'item_issues_description', 'item_issues_timestamp',\n",
|
|
" 'item_issues_status', 'item_issues_reported_by',\n",
|
|
" 'item_issues_resolved_by', 'item_issues_comment', 'item_issues_count',\n",
|
|
" 'sword_depositor', 'sword_slug', 'exemplar', 'home_page', 'title',\n",
|
|
" 'oai_pmh', 'sword_endpoint', 'rss_feed', 'twitter_feed', 'description',\n",
|
|
" 'fulltext', 'open_access', 'mandate', 'organisation_title',\n",
|
|
" 'organisation_home_page', 'location_country', 'location_city',\n",
|
|
" 'location_latitude', 'location_longitude', 'software', 'geoname',\n",
|
|
" 'version', 'subjects', 'date', 'note', 'suggestions', 'activity_low',\n",
|
|
" 'activity_medium', 'activity_high', 'recordcount', 'recordhistory',\n",
|
|
" 'fulltexts_total', 'fulltexts_docs', 'fulltexts_rtotal',\n",
|
|
" 'fulltexts_rdocs', 'registry_name', 'registry_id', 'submit_to',\n",
|
|
" 'submitted_to_name', 'submitted_to_done', 'webometrics_rank',\n",
|
|
" 'webometrics_size', 'webometrics_visibility', 'webometrics_rich_files',\n",
|
|
" 'webometrics_scholar', 'monthly_deposits', 'total_deposits',\n",
|
|
" 'association'],\n",
|
|
" dtype='object')"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"roar_df.columns"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>eprintid</th>\n",
|
|
" <th>rev_number</th>\n",
|
|
" <th>eprint_status</th>\n",
|
|
" <th>userid</th>\n",
|
|
" <th>importid</th>\n",
|
|
" <th>source</th>\n",
|
|
" <th>dir</th>\n",
|
|
" <th>datestamp</th>\n",
|
|
" <th>lastmod</th>\n",
|
|
" <th>status_changed</th>\n",
|
|
" <th>type</th>\n",
|
|
" <th>succeeds</th>\n",
|
|
" <th>commentary</th>\n",
|
|
" <th>metadata_visibility</th>\n",
|
|
" <th>latitude</th>\n",
|
|
" <th>longitude</th>\n",
|
|
" <th>relation_type</th>\n",
|
|
" <th>relation_uri</th>\n",
|
|
" <th>item_issues_id</th>\n",
|
|
" <th>item_issues_type</th>\n",
|
|
" <th>item_issues_description</th>\n",
|
|
" <th>item_issues_timestamp</th>\n",
|
|
" <th>item_issues_status</th>\n",
|
|
" <th>item_issues_reported_by</th>\n",
|
|
" <th>item_issues_resolved_by</th>\n",
|
|
" <th>item_issues_comment</th>\n",
|
|
" <th>item_issues_count</th>\n",
|
|
" <th>sword_depositor</th>\n",
|
|
" <th>sword_slug</th>\n",
|
|
" <th>exemplar</th>\n",
|
|
" <th>home_page</th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>oai_pmh</th>\n",
|
|
" <th>sword_endpoint</th>\n",
|
|
" <th>rss_feed</th>\n",
|
|
" <th>twitter_feed</th>\n",
|
|
" <th>description</th>\n",
|
|
" <th>fulltext</th>\n",
|
|
" <th>open_access</th>\n",
|
|
" <th>mandate</th>\n",
|
|
" <th>organisation_title</th>\n",
|
|
" <th>organisation_home_page</th>\n",
|
|
" <th>location_country</th>\n",
|
|
" <th>location_city</th>\n",
|
|
" <th>location_latitude</th>\n",
|
|
" <th>location_longitude</th>\n",
|
|
" <th>software</th>\n",
|
|
" <th>geoname</th>\n",
|
|
" <th>version</th>\n",
|
|
" <th>subjects</th>\n",
|
|
" <th>date</th>\n",
|
|
" <th>note</th>\n",
|
|
" <th>suggestions</th>\n",
|
|
" <th>activity_low</th>\n",
|
|
" <th>activity_medium</th>\n",
|
|
" <th>activity_high</th>\n",
|
|
" <th>recordcount</th>\n",
|
|
" <th>recordhistory</th>\n",
|
|
" <th>fulltexts_total</th>\n",
|
|
" <th>fulltexts_docs</th>\n",
|
|
" <th>fulltexts_rtotal</th>\n",
|
|
" <th>fulltexts_rdocs</th>\n",
|
|
" <th>registry_name</th>\n",
|
|
" <th>registry_id</th>\n",
|
|
" <th>submit_to</th>\n",
|
|
" <th>submitted_to_name</th>\n",
|
|
" <th>submitted_to_done</th>\n",
|
|
" <th>webometrics_rank</th>\n",
|
|
" <th>webometrics_size</th>\n",
|
|
" <th>webometrics_visibility</th>\n",
|
|
" <th>webometrics_rich_files</th>\n",
|
|
" <th>webometrics_scholar</th>\n",
|
|
" <th>monthly_deposits</th>\n",
|
|
" <th>total_deposits</th>\n",
|
|
" <th>association</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>count</th>\n",
|
|
" <td>18079</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>108</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>86</td>\n",
|
|
" <td>86</td>\n",
|
|
" <td>86</td>\n",
|
|
" <td>86</td>\n",
|
|
" <td>86</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>2242</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>268</td>\n",
|
|
" <td>5437</td>\n",
|
|
" <td>5442</td>\n",
|
|
" <td>4567</td>\n",
|
|
" <td>178</td>\n",
|
|
" <td>1538</td>\n",
|
|
" <td>116</td>\n",
|
|
" <td>3837</td>\n",
|
|
" <td>4197</td>\n",
|
|
" <td>4197</td>\n",
|
|
" <td>3746</td>\n",
|
|
" <td>5253</td>\n",
|
|
" <td>4965</td>\n",
|
|
" <td>5268</td>\n",
|
|
" <td>3839</td>\n",
|
|
" <td>3798</td>\n",
|
|
" <td>3780</td>\n",
|
|
" <td>4700</td>\n",
|
|
" <td>4730</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>10941</td>\n",
|
|
" <td>5429</td>\n",
|
|
" <td>218</td>\n",
|
|
" <td>189</td>\n",
|
|
" <td>2288</td>\n",
|
|
" <td>2288</td>\n",
|
|
" <td>2288</td>\n",
|
|
" <td>2290</td>\n",
|
|
" <td>2288</td>\n",
|
|
" <td>270</td>\n",
|
|
" <td>258</td>\n",
|
|
" <td>270</td>\n",
|
|
" <td>258</td>\n",
|
|
" <td>7393</td>\n",
|
|
" <td>7258</td>\n",
|
|
" <td>730</td>\n",
|
|
" <td>205</td>\n",
|
|
" <td>205</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>756</td>\n",
|
|
" <td>756</td>\n",
|
|
" <td>237</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>unique</th>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>660</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>2189</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>4198</td>\n",
|
|
" <td>4043</td>\n",
|
|
" <td>4230</td>\n",
|
|
" <td>12</td>\n",
|
|
" <td>108</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>51</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>68</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>5271</td>\n",
|
|
" <td>5143</td>\n",
|
|
" <td>4278</td>\n",
|
|
" <td>172</td>\n",
|
|
" <td>1485</td>\n",
|
|
" <td>112</td>\n",
|
|
" <td>3359</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>4498</td>\n",
|
|
" <td>4395</td>\n",
|
|
" <td>136</td>\n",
|
|
" <td>1896</td>\n",
|
|
" <td>2965</td>\n",
|
|
" <td>3003</td>\n",
|
|
" <td>31</td>\n",
|
|
" <td>126</td>\n",
|
|
" <td>53</td>\n",
|
|
" <td>237</td>\n",
|
|
" <td>4898</td>\n",
|
|
" <td>210</td>\n",
|
|
" <td>173</td>\n",
|
|
" <td>72</td>\n",
|
|
" <td>54</td>\n",
|
|
" <td>16</td>\n",
|
|
" <td>741</td>\n",
|
|
" <td>1702</td>\n",
|
|
" <td>135</td>\n",
|
|
" <td>118</td>\n",
|
|
" <td>134</td>\n",
|
|
" <td>117</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>5165</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>146</td>\n",
|
|
" <td>143</td>\n",
|
|
" <td>346</td>\n",
|
|
" <td>342</td>\n",
|
|
" <td>2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>top</th>\n",
|
|
" <td>12118</td>\n",
|
|
" <td>11</td>\n",
|
|
" <td>archive</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>disk0/00/00/09/21</td>\n",
|
|
" <td>2010-01-06 13:43:48</td>\n",
|
|
" <td>2016-04-17 21:55:19</td>\n",
|
|
" <td>2010-01-06 13:43:48</td>\n",
|
|
" <td>institutional</td>\n",
|
|
" <td>3164</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>show</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>bad_oai_pmh_url_0</td>\n",
|
|
" <td>duplicate_title</td>\n",
|
|
" <td>Similar title to <xhtml:table xmlns:xhtml=\"htt...</td>\n",
|
|
" <td>2010-01-13 10:44:49</td>\n",
|
|
" <td>discovered</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>FALSE</td>\n",
|
|
" <td>http://eprints.upnjatim.ac.id/</td>\n",
|
|
" <td>Repositorio Institucional</td>\n",
|
|
" <td>http://virtuelcampus.univ-msila.dz/fll</td>\n",
|
|
" <td>http://producao.usp.br/sword/servicedocument</td>\n",
|
|
" <td>http://eprints.upnjatim.ac.id/cgi/latest_tool?...</td>\n",
|
|
" <td>http://twitter.com/bu_ufsc</td>\n",
|
|
" <td>info:other:archives.eprints.org:import</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>FALSE</td>\n",
|
|
" <td>Chinese Academy of Science (中国科学院)</td>\n",
|
|
" <td>http://www.cas.cn/</td>\n",
|
|
" <td>us</td>\n",
|
|
" <td>Lima</td>\n",
|
|
" <td>34.1607</td>\n",
|
|
" <td>-118.139</td>\n",
|
|
" <td>dspace</td>\n",
|
|
" <td>geoname_2_US</td>\n",
|
|
" <td>other</td>\n",
|
|
" <td>L1</td>\n",
|
|
" <td>2006-05-04 10:48:14</td>\n",
|
|
" <td>¿Quién puede depositar documentos en el reposi...</td>\n",
|
|
" <td>This repository is hosted by the Texas Digital...</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>celestial</td>\n",
|
|
" <td>184</td>\n",
|
|
" <td>celestial</td>\n",
|
|
" <td>opendoar</td>\n",
|
|
" <td>2021-01-25</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>6</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>824</td>\n",
|
|
" <td>806</td>\n",
|
|
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>russell_group</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>freq</th>\n",
|
|
" <td>238</td>\n",
|
|
" <td>333</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>1330</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>16</td>\n",
|
|
" <td>8</td>\n",
|
|
" <td>16</td>\n",
|
|
" <td>3853</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>5402</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>18</td>\n",
|
|
" <td>47</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>68</td>\n",
|
|
" <td>53</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2201</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>261</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>7</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>112</td>\n",
|
|
" <td>2805</td>\n",
|
|
" <td>2696</td>\n",
|
|
" <td>2748</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>907</td>\n",
|
|
" <td>76</td>\n",
|
|
" <td>25</td>\n",
|
|
" <td>25</td>\n",
|
|
" <td>2341</td>\n",
|
|
" <td>845</td>\n",
|
|
" <td>4841</td>\n",
|
|
" <td>362</td>\n",
|
|
" <td>99</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>2012</td>\n",
|
|
" <td>2074</td>\n",
|
|
" <td>2210</td>\n",
|
|
" <td>730</td>\n",
|
|
" <td>95</td>\n",
|
|
" <td>113</td>\n",
|
|
" <td>114</td>\n",
|
|
" <td>113</td>\n",
|
|
" <td>114</td>\n",
|
|
" <td>3741</td>\n",
|
|
" <td>7</td>\n",
|
|
" <td>283</td>\n",
|
|
" <td>205</td>\n",
|
|
" <td>205</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>387</td>\n",
|
|
" <td>387</td>\n",
|
|
" <td>144</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" eprintid rev_number eprint_status userid importid source \\\n",
|
|
"count 18079 5444 5444 5444 0 0 \n",
|
|
"unique 5444 660 1 2189 0 0 \n",
|
|
"top 12118 11 archive 1 NaN NaN \n",
|
|
"freq 238 333 5444 1330 NaN NaN \n",
|
|
"\n",
|
|
" dir datestamp lastmod \\\n",
|
|
"count 5444 5444 5444 \n",
|
|
"unique 5444 4198 4043 \n",
|
|
"top disk0/00/00/09/21 2010-01-06 13:43:48 2016-04-17 21:55:19 \n",
|
|
"freq 1 16 8 \n",
|
|
"\n",
|
|
" status_changed type succeeds commentary \\\n",
|
|
"count 5444 5444 108 0 \n",
|
|
"unique 4230 12 108 0 \n",
|
|
"top 2010-01-06 13:43:48 institutional 3164 NaN \n",
|
|
"freq 16 3853 1 NaN \n",
|
|
"\n",
|
|
" metadata_visibility latitude longitude relation_type relation_uri \\\n",
|
|
"count 5444 0 0 0 0 \n",
|
|
"unique 2 0 0 0 0 \n",
|
|
"top show NaN NaN NaN NaN \n",
|
|
"freq 5402 NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" item_issues_id item_issues_type \\\n",
|
|
"count 86 86 \n",
|
|
"unique 51 3 \n",
|
|
"top bad_oai_pmh_url_0 duplicate_title \n",
|
|
"freq 18 47 \n",
|
|
"\n",
|
|
" item_issues_description \\\n",
|
|
"count 86 \n",
|
|
"unique 68 \n",
|
|
"top Similar title to <xhtml:table xmlns:xhtml=\"htt... \n",
|
|
"freq 3 \n",
|
|
"\n",
|
|
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
|
|
"count 86 86 0 \n",
|
|
"unique 3 2 0 \n",
|
|
"top 2010-01-13 10:44:49 discovered NaN \n",
|
|
"freq 68 53 NaN \n",
|
|
"\n",
|
|
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
|
|
"count 0 0 2242 \n",
|
|
"unique 0 0 4 \n",
|
|
"top NaN NaN 0 \n",
|
|
"freq NaN NaN 2201 \n",
|
|
"\n",
|
|
" sword_depositor sword_slug exemplar home_page \\\n",
|
|
"count 0 0 268 5437 \n",
|
|
"unique 0 0 2 5271 \n",
|
|
"top NaN NaN FALSE http://eprints.upnjatim.ac.id/ \n",
|
|
"freq NaN NaN 261 4 \n",
|
|
"\n",
|
|
" title oai_pmh \\\n",
|
|
"count 5442 4567 \n",
|
|
"unique 5143 4278 \n",
|
|
"top Repositorio Institucional http://virtuelcampus.univ-msila.dz/fll \n",
|
|
"freq 7 5 \n",
|
|
"\n",
|
|
" sword_endpoint \\\n",
|
|
"count 178 \n",
|
|
"unique 172 \n",
|
|
"top http://producao.usp.br/sword/servicedocument \n",
|
|
"freq 2 \n",
|
|
"\n",
|
|
" rss_feed \\\n",
|
|
"count 1538 \n",
|
|
"unique 1485 \n",
|
|
"top http://eprints.upnjatim.ac.id/cgi/latest_tool?... \n",
|
|
"freq 5 \n",
|
|
"\n",
|
|
" twitter_feed description \\\n",
|
|
"count 116 3837 \n",
|
|
"unique 112 3359 \n",
|
|
"top http://twitter.com/bu_ufsc info:other:archives.eprints.org:import \n",
|
|
"freq 2 112 \n",
|
|
"\n",
|
|
" fulltext open_access mandate organisation_title \\\n",
|
|
"count 4197 4197 3746 5253 \n",
|
|
"unique 2 2 2 4498 \n",
|
|
"top TRUE TRUE FALSE Chinese Academy of Science (中国科学院) \n",
|
|
"freq 2805 2696 2748 9 \n",
|
|
"\n",
|
|
" organisation_home_page location_country location_city \\\n",
|
|
"count 4965 5268 3839 \n",
|
|
"unique 4395 136 1896 \n",
|
|
"top http://www.cas.cn/ us Lima \n",
|
|
"freq 9 907 76 \n",
|
|
"\n",
|
|
" location_latitude location_longitude software geoname version \\\n",
|
|
"count 3798 3780 4700 4730 5444 \n",
|
|
"unique 2965 3003 31 126 53 \n",
|
|
"top 34.1607 -118.139 dspace geoname_2_US other \n",
|
|
"freq 25 25 2341 845 4841 \n",
|
|
"\n",
|
|
" subjects date \\\n",
|
|
"count 10941 5429 \n",
|
|
"unique 237 4898 \n",
|
|
"top L1 2006-05-04 10:48:14 \n",
|
|
"freq 362 99 \n",
|
|
"\n",
|
|
" note \\\n",
|
|
"count 218 \n",
|
|
"unique 210 \n",
|
|
"top ¿Quién puede depositar documentos en el reposi... \n",
|
|
"freq 2 \n",
|
|
"\n",
|
|
" suggestions activity_low \\\n",
|
|
"count 189 2288 \n",
|
|
"unique 173 72 \n",
|
|
"top This repository is hosted by the Texas Digital... 0 \n",
|
|
"freq 9 2012 \n",
|
|
"\n",
|
|
" activity_medium activity_high recordcount \\\n",
|
|
"count 2288 2288 2290 \n",
|
|
"unique 54 16 741 \n",
|
|
"top 0 0 100 \n",
|
|
"freq 2074 2210 730 \n",
|
|
"\n",
|
|
" recordhistory fulltexts_total \\\n",
|
|
"count 2288 270 \n",
|
|
"unique 1702 135 \n",
|
|
"top 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 0 \n",
|
|
"freq 95 113 \n",
|
|
"\n",
|
|
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name \\\n",
|
|
"count 258 270 258 7393 \n",
|
|
"unique 118 134 117 3 \n",
|
|
"top 0 0 0 celestial \n",
|
|
"freq 114 113 114 3741 \n",
|
|
"\n",
|
|
" registry_id submit_to submitted_to_name submitted_to_done \\\n",
|
|
"count 7258 730 205 205 \n",
|
|
"unique 5165 3 1 1 \n",
|
|
"top 184 celestial opendoar 2021-01-25 \n",
|
|
"freq 7 283 205 205 \n",
|
|
"\n",
|
|
" webometrics_rank webometrics_size webometrics_visibility \\\n",
|
|
"count 148 148 148 \n",
|
|
"unique 148 148 148 \n",
|
|
"top 1 6 1 \n",
|
|
"freq 1 1 1 \n",
|
|
"\n",
|
|
" webometrics_rich_files webometrics_scholar \\\n",
|
|
"count 148 148 \n",
|
|
"unique 146 143 \n",
|
|
"top 824 806 \n",
|
|
"freq 3 5 \n",
|
|
"\n",
|
|
" monthly_deposits total_deposits \\\n",
|
|
"count 756 756 \n",
|
|
"unique 346 342 \n",
|
|
"top 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 0 \n",
|
|
"freq 387 387 \n",
|
|
"\n",
|
|
" association \n",
|
|
"count 237 \n",
|
|
"unique 2 \n",
|
|
"top russell_group \n",
|
|
"freq 144 "
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"roar_df.describe(include='all')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>eprintid</th>\n",
|
|
" <th>rev_number</th>\n",
|
|
" <th>eprint_status</th>\n",
|
|
" <th>userid</th>\n",
|
|
" <th>importid</th>\n",
|
|
" <th>source</th>\n",
|
|
" <th>dir</th>\n",
|
|
" <th>datestamp</th>\n",
|
|
" <th>lastmod</th>\n",
|
|
" <th>status_changed</th>\n",
|
|
" <th>type</th>\n",
|
|
" <th>succeeds</th>\n",
|
|
" <th>commentary</th>\n",
|
|
" <th>metadata_visibility</th>\n",
|
|
" <th>latitude</th>\n",
|
|
" <th>longitude</th>\n",
|
|
" <th>relation_type</th>\n",
|
|
" <th>relation_uri</th>\n",
|
|
" <th>item_issues_id</th>\n",
|
|
" <th>item_issues_type</th>\n",
|
|
" <th>item_issues_description</th>\n",
|
|
" <th>item_issues_timestamp</th>\n",
|
|
" <th>item_issues_status</th>\n",
|
|
" <th>item_issues_reported_by</th>\n",
|
|
" <th>item_issues_resolved_by</th>\n",
|
|
" <th>item_issues_comment</th>\n",
|
|
" <th>item_issues_count</th>\n",
|
|
" <th>sword_depositor</th>\n",
|
|
" <th>sword_slug</th>\n",
|
|
" <th>exemplar</th>\n",
|
|
" <th>home_page</th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>oai_pmh</th>\n",
|
|
" <th>sword_endpoint</th>\n",
|
|
" <th>rss_feed</th>\n",
|
|
" <th>twitter_feed</th>\n",
|
|
" <th>description</th>\n",
|
|
" <th>fulltext</th>\n",
|
|
" <th>open_access</th>\n",
|
|
" <th>mandate</th>\n",
|
|
" <th>organisation_title</th>\n",
|
|
" <th>organisation_home_page</th>\n",
|
|
" <th>location_country</th>\n",
|
|
" <th>location_city</th>\n",
|
|
" <th>location_latitude</th>\n",
|
|
" <th>location_longitude</th>\n",
|
|
" <th>software</th>\n",
|
|
" <th>geoname</th>\n",
|
|
" <th>version</th>\n",
|
|
" <th>subjects</th>\n",
|
|
" <th>date</th>\n",
|
|
" <th>note</th>\n",
|
|
" <th>suggestions</th>\n",
|
|
" <th>activity_low</th>\n",
|
|
" <th>activity_medium</th>\n",
|
|
" <th>activity_high</th>\n",
|
|
" <th>recordcount</th>\n",
|
|
" <th>recordhistory</th>\n",
|
|
" <th>fulltexts_total</th>\n",
|
|
" <th>fulltexts_docs</th>\n",
|
|
" <th>fulltexts_rtotal</th>\n",
|
|
" <th>fulltexts_rdocs</th>\n",
|
|
" <th>registry_name</th>\n",
|
|
" <th>registry_id</th>\n",
|
|
" <th>submit_to</th>\n",
|
|
" <th>submitted_to_name</th>\n",
|
|
" <th>submitted_to_done</th>\n",
|
|
" <th>webometrics_rank</th>\n",
|
|
" <th>webometrics_size</th>\n",
|
|
" <th>webometrics_visibility</th>\n",
|
|
" <th>webometrics_rich_files</th>\n",
|
|
" <th>webometrics_scholar</th>\n",
|
|
" <th>monthly_deposits</th>\n",
|
|
" <th>total_deposits</th>\n",
|
|
" <th>association</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>606</td>\n",
|
|
" <td>657</td>\n",
|
|
" <td>archive</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>disk0/00/00/06/06</td>\n",
|
|
" <td>2010-01-06 13:44:31</td>\n",
|
|
" <td>2016-04-17 21:53:14</td>\n",
|
|
" <td>2010-01-06 13:44:31</td>\n",
|
|
" <td>subject</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>show</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>http://hal.archives-ouvertes.fr/</td>\n",
|
|
" <td>HAL: Hyper Article en Ligne</td>\n",
|
|
" <td>http://hal.archives-ouvertes.fr/oai/oai.php</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>fr</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>hal</td>\n",
|
|
" <td>geoname_2_FR</td>\n",
|
|
" <td>other</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1998-11-02 11:53:57</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>675816</td>\n",
|
|
" <td>4,12,17,26,43,57,81,185,431,861,1184,1517,2442...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>opendoar</td>\n",
|
|
" <td>166</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>6</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
|
|
" <td>3063</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>606</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>celestial</td>\n",
|
|
" <td>1106</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>606</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>roarmap</td>\n",
|
|
" <td>69</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" eprintid rev_number eprint_status userid importid source dir \\\n",
|
|
"2 606 657 archive 1 NaN NaN disk0/00/00/06/06 \n",
|
|
"3 606 NaN NaN NaN NaN NaN NaN \n",
|
|
"4 606 NaN NaN NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" datestamp lastmod status_changed type \\\n",
|
|
"2 2010-01-06 13:44:31 2016-04-17 21:53:14 2010-01-06 13:44:31 subject \n",
|
|
"3 NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" succeeds commentary metadata_visibility latitude longitude relation_type \\\n",
|
|
"2 NaN NaN show NaN NaN NaN \n",
|
|
"3 NaN NaN NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" relation_uri item_issues_id item_issues_type item_issues_description \\\n",
|
|
"2 NaN NaN NaN NaN \n",
|
|
"3 NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
|
|
"2 NaN NaN NaN \n",
|
|
"3 NaN NaN NaN \n",
|
|
"4 NaN NaN NaN \n",
|
|
"\n",
|
|
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
|
|
"2 NaN NaN 0 \n",
|
|
"3 NaN NaN NaN \n",
|
|
"4 NaN NaN NaN \n",
|
|
"\n",
|
|
" sword_depositor sword_slug exemplar home_page \\\n",
|
|
"2 NaN NaN NaN http://hal.archives-ouvertes.fr/ \n",
|
|
"3 NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" title oai_pmh \\\n",
|
|
"2 HAL: Hyper Article en Ligne http://hal.archives-ouvertes.fr/oai/oai.php \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN \n",
|
|
"\n",
|
|
" sword_endpoint rss_feed twitter_feed description fulltext open_access \\\n",
|
|
"2 NaN NaN NaN NaN TRUE TRUE \n",
|
|
"3 NaN NaN NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" mandate organisation_title organisation_home_page location_country \\\n",
|
|
"2 NaN NaN NaN fr \n",
|
|
"3 NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" location_city location_latitude location_longitude software geoname \\\n",
|
|
"2 NaN NaN NaN hal geoname_2_FR \n",
|
|
"3 NaN NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" version subjects date note suggestions activity_low \\\n",
|
|
"2 other NaN 1998-11-02 11:53:57 NaN NaN 0 \n",
|
|
"3 NaN NaN NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" activity_medium activity_high recordcount \\\n",
|
|
"2 0 2 675816 \n",
|
|
"3 NaN NaN NaN \n",
|
|
"4 NaN NaN NaN \n",
|
|
"\n",
|
|
" recordhistory fulltexts_total \\\n",
|
|
"2 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN \n",
|
|
"\n",
|
|
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name registry_id \\\n",
|
|
"2 NaN NaN NaN opendoar 166 \n",
|
|
"3 NaN NaN NaN celestial 1106 \n",
|
|
"4 NaN NaN NaN roarmap 69 \n",
|
|
"\n",
|
|
" submit_to submitted_to_name submitted_to_done webometrics_rank \\\n",
|
|
"2 NaN NaN NaN 1 \n",
|
|
"3 NaN NaN NaN NaN \n",
|
|
"4 NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" webometrics_size webometrics_visibility webometrics_rich_files \\\n",
|
|
"2 6 1 1 \n",
|
|
"3 NaN NaN NaN \n",
|
|
"4 NaN NaN NaN \n",
|
|
"\n",
|
|
" webometrics_scholar monthly_deposits \\\n",
|
|
"2 1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN \n",
|
|
"\n",
|
|
" total_deposits association \n",
|
|
"2 3063 NaN \n",
|
|
"3 NaN NaN \n",
|
|
"4 NaN NaN "
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"roar_df[roar_df.eprintid == '606']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>rev_number</th>\n",
|
|
" <th>eprint_status</th>\n",
|
|
" <th>userid</th>\n",
|
|
" <th>importid</th>\n",
|
|
" <th>source</th>\n",
|
|
" <th>dir</th>\n",
|
|
" <th>datestamp</th>\n",
|
|
" <th>lastmod</th>\n",
|
|
" <th>status_changed</th>\n",
|
|
" <th>type</th>\n",
|
|
" <th>succeeds</th>\n",
|
|
" <th>commentary</th>\n",
|
|
" <th>metadata_visibility</th>\n",
|
|
" <th>latitude</th>\n",
|
|
" <th>longitude</th>\n",
|
|
" <th>relation_type</th>\n",
|
|
" <th>relation_uri</th>\n",
|
|
" <th>item_issues_id</th>\n",
|
|
" <th>item_issues_type</th>\n",
|
|
" <th>item_issues_description</th>\n",
|
|
" <th>item_issues_timestamp</th>\n",
|
|
" <th>item_issues_status</th>\n",
|
|
" <th>item_issues_reported_by</th>\n",
|
|
" <th>item_issues_resolved_by</th>\n",
|
|
" <th>item_issues_comment</th>\n",
|
|
" <th>item_issues_count</th>\n",
|
|
" <th>sword_depositor</th>\n",
|
|
" <th>sword_slug</th>\n",
|
|
" <th>exemplar</th>\n",
|
|
" <th>home_page</th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>oai_pmh</th>\n",
|
|
" <th>sword_endpoint</th>\n",
|
|
" <th>rss_feed</th>\n",
|
|
" <th>twitter_feed</th>\n",
|
|
" <th>description</th>\n",
|
|
" <th>fulltext</th>\n",
|
|
" <th>open_access</th>\n",
|
|
" <th>mandate</th>\n",
|
|
" <th>organisation_title</th>\n",
|
|
" <th>organisation_home_page</th>\n",
|
|
" <th>location_country</th>\n",
|
|
" <th>location_city</th>\n",
|
|
" <th>location_latitude</th>\n",
|
|
" <th>location_longitude</th>\n",
|
|
" <th>software</th>\n",
|
|
" <th>geoname</th>\n",
|
|
" <th>version</th>\n",
|
|
" <th>subjects</th>\n",
|
|
" <th>date</th>\n",
|
|
" <th>note</th>\n",
|
|
" <th>suggestions</th>\n",
|
|
" <th>activity_low</th>\n",
|
|
" <th>activity_medium</th>\n",
|
|
" <th>activity_high</th>\n",
|
|
" <th>recordcount</th>\n",
|
|
" <th>recordhistory</th>\n",
|
|
" <th>fulltexts_total</th>\n",
|
|
" <th>fulltexts_docs</th>\n",
|
|
" <th>fulltexts_rtotal</th>\n",
|
|
" <th>fulltexts_rdocs</th>\n",
|
|
" <th>registry_name</th>\n",
|
|
" <th>registry_id</th>\n",
|
|
" <th>submit_to</th>\n",
|
|
" <th>submitted_to_name</th>\n",
|
|
" <th>submitted_to_done</th>\n",
|
|
" <th>webometrics_rank</th>\n",
|
|
" <th>webometrics_size</th>\n",
|
|
" <th>webometrics_visibility</th>\n",
|
|
" <th>webometrics_rich_files</th>\n",
|
|
" <th>webometrics_scholar</th>\n",
|
|
" <th>monthly_deposits</th>\n",
|
|
" <th>total_deposits</th>\n",
|
|
" <th>association</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>eprintid</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>{633, nan}</td>\n",
|
|
" <td>{nan, archive}</td>\n",
|
|
" <td>{nan, 1}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, disk0/00/00/00/01}</td>\n",
|
|
" <td>{nan, 2010-01-06 13:43:48}</td>\n",
|
|
" <td>{nan, 2011-07-18 05:40:07}</td>\n",
|
|
" <td>{nan, 2010-01-06 13:43:48}</td>\n",
|
|
" <td>{nan, subject}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{show, nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, 0}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, http://archivesic.ccsd.cnrs.fr/}</td>\n",
|
|
" <td>{nan, @RCHIVESIC }</td>\n",
|
|
" <td>{nan, http://archivesic.ccsd.cnrs.fr/oai/oai.php}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, fr}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, hal}</td>\n",
|
|
" <td>{nan, geoname_2_FR}</td>\n",
|
|
" <td>{nan, other}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, 2002-05-17 19:24:41}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, 0}</td>\n",
|
|
" <td>{nan, 0}</td>\n",
|
|
" <td>{nan, 0}</td>\n",
|
|
" <td>{nan, 25}</td>\n",
|
|
" <td>{nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{celestial, opendoar}</td>\n",
|
|
" <td>{669, 58}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10</th>\n",
|
|
" <td>{nan, 511}</td>\n",
|
|
" <td>{nan, archive}</td>\n",
|
|
" <td>{nan, 1}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, disk0/00/00/00/10}</td>\n",
|
|
" <td>{nan, 2010-01-06 13:43:48}</td>\n",
|
|
" <td>{nan, 2011-07-18 05:40:13}</td>\n",
|
|
" <td>{nan, 2010-01-06 13:43:48}</td>\n",
|
|
" <td>{nan, institutional}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{show, nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, 0}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, http://www.diva-portal.org/mdh/}</td>\n",
|
|
" <td>{nan, Academic Archive On-line (Mälardalen Uni...</td>\n",
|
|
" <td>{nan, http://www.diva-portal.org/oai/mdh/OAI}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, TRUE}</td>\n",
|
|
" <td>{nan, TRUE}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, se}</td>\n",
|
|
" <td>{nan, Uppsala}</td>\n",
|
|
" <td>{nan, 59.8667}</td>\n",
|
|
" <td>{17.6333, nan}</td>\n",
|
|
" <td>{nan, diva}</td>\n",
|
|
" <td>{nan, geoname_2_SE}</td>\n",
|
|
" <td>{nan, other}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, 2005-12-08 13:15:22}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, 0}</td>\n",
|
|
" <td>{nan, 0}</td>\n",
|
|
" <td>{nan, 0}</td>\n",
|
|
" <td>{nan, 100}</td>\n",
|
|
" <td>{nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,1...</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{celestial, opendoar}</td>\n",
|
|
" <td>{258, 526}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1000</th>\n",
|
|
" <td>{274}</td>\n",
|
|
" <td>{archive}</td>\n",
|
|
" <td>{1}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{disk0/00/00/10/00}</td>\n",
|
|
" <td>{2010-01-06 13:45:01}</td>\n",
|
|
" <td>{2011-07-06 08:21:21}</td>\n",
|
|
" <td>{2010-01-06 13:45:01}</td>\n",
|
|
" <td>{subject}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{show}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{0}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{http://pam.pisharp.org/}</td>\n",
|
|
" <td>{PAM - Portuguese Archive of Mathematics}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{TRUE}</td>\n",
|
|
" <td>{TRUE}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{pt}</td>\n",
|
|
" <td>{Bellevue, WA}</td>\n",
|
|
" <td>{47.6034}</td>\n",
|
|
" <td>{-122.155}</td>\n",
|
|
" <td>{dspace}</td>\n",
|
|
" <td>{geoname_2_PT}</td>\n",
|
|
" <td>{other}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{2006-05-04 10:48:14}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10001</th>\n",
|
|
" <td>{nan, 20}</td>\n",
|
|
" <td>{nan, archive}</td>\n",
|
|
" <td>{nan, 91}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, disk0/00/01/00/01}</td>\n",
|
|
" <td>{nan, 2015-08-08 14:52:11}</td>\n",
|
|
" <td>{nan, 2016-03-21 19:44:01}</td>\n",
|
|
" <td>{nan, 2015-08-08 14:52:11}</td>\n",
|
|
" <td>{nan, subject}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{show, nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, http://edoc.sub.uni-hamburg.de/klimawand...</td>\n",
|
|
" <td>{nan, Klimawandel Dokumentenserver}</td>\n",
|
|
" <td>{nan, http://edoc.sub.uni-hamburg.de/klimawand...</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, The \"Documentenserver Klimawandel\" (Repo...</td>\n",
|
|
" <td>{nan, TRUE}</td>\n",
|
|
" <td>{nan, TRUE}</td>\n",
|
|
" <td>{nan, TRUE}</td>\n",
|
|
" <td>{nan, Helmholtz-Zentrum Geesthacht, KLIMZUG pr...</td>\n",
|
|
" <td>{http://www.klimzug.de/de/94.php, nan, http://...</td>\n",
|
|
" <td>{nan, de}</td>\n",
|
|
" <td>{nan, Hamburg}</td>\n",
|
|
" <td>{nan, 53.5511}</td>\n",
|
|
" <td>{nan, 9.9937}</td>\n",
|
|
" <td>{nan, opus}</td>\n",
|
|
" <td>{nan, geoname_2_DE}</td>\n",
|
|
" <td>{nan, other}</td>\n",
|
|
" <td>{S1, G1, GF, GE, HD}</td>\n",
|
|
" <td>{nan, 2015-07-02 08:08:31}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan, celestial, opendoar}</td>\n",
|
|
" <td>{nan, 5881, 3408}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10008</th>\n",
|
|
" <td>{11}</td>\n",
|
|
" <td>{archive}</td>\n",
|
|
" <td>{404}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{disk0/00/01/00/08}</td>\n",
|
|
" <td>{2015-08-08 14:52:26}</td>\n",
|
|
" <td>{2016-03-21 19:43:51}</td>\n",
|
|
" <td>{2015-08-08 14:52:26}</td>\n",
|
|
" <td>{institutional}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{show}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{http://creativematter.skidmore.edu/}</td>\n",
|
|
" <td>{Creative Matter | Skidmore College Research}</td>\n",
|
|
" <td>{http://creativematter.skidmore.edu/do/oai/}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{http://creativematter.skidmore.edu/recent.rss}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{Welcome to Creative Matter, a repository for ...</td>\n",
|
|
" <td>{TRUE}</td>\n",
|
|
" <td>{FALSE}</td>\n",
|
|
" <td>{FALSE}</td>\n",
|
|
" <td>{Skidmore College}</td>\n",
|
|
" <td>{http://www.skidmore.edu/}</td>\n",
|
|
" <td>{us}</td>\n",
|
|
" <td>{Saratoga Springs}</td>\n",
|
|
" <td>{43.0961}</td>\n",
|
|
" <td>{-73.7818}</td>\n",
|
|
" <td>{bepress}</td>\n",
|
|
" <td>{geoname_2_US}</td>\n",
|
|
" <td>{other}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{2015-07-06 17:35:50}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{celestial}</td>\n",
|
|
" <td>{5882}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" <td>{nan}</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" rev_number eprint_status userid importid source \\\n",
|
|
"eprintid \n",
|
|
"1 {633, nan} {nan, archive} {nan, 1} {nan} {nan} \n",
|
|
"10 {nan, 511} {nan, archive} {nan, 1} {nan} {nan} \n",
|
|
"1000 {274} {archive} {1} {nan} {nan} \n",
|
|
"10001 {nan, 20} {nan, archive} {nan, 91} {nan} {nan} \n",
|
|
"10008 {11} {archive} {404} {nan} {nan} \n",
|
|
"\n",
|
|
" dir datestamp \\\n",
|
|
"eprintid \n",
|
|
"1 {nan, disk0/00/00/00/01} {nan, 2010-01-06 13:43:48} \n",
|
|
"10 {nan, disk0/00/00/00/10} {nan, 2010-01-06 13:43:48} \n",
|
|
"1000 {disk0/00/00/10/00} {2010-01-06 13:45:01} \n",
|
|
"10001 {nan, disk0/00/01/00/01} {nan, 2015-08-08 14:52:11} \n",
|
|
"10008 {disk0/00/01/00/08} {2015-08-08 14:52:26} \n",
|
|
"\n",
|
|
" lastmod status_changed \\\n",
|
|
"eprintid \n",
|
|
"1 {nan, 2011-07-18 05:40:07} {nan, 2010-01-06 13:43:48} \n",
|
|
"10 {nan, 2011-07-18 05:40:13} {nan, 2010-01-06 13:43:48} \n",
|
|
"1000 {2011-07-06 08:21:21} {2010-01-06 13:45:01} \n",
|
|
"10001 {nan, 2016-03-21 19:44:01} {nan, 2015-08-08 14:52:11} \n",
|
|
"10008 {2016-03-21 19:43:51} {2015-08-08 14:52:26} \n",
|
|
"\n",
|
|
" type succeeds commentary metadata_visibility \\\n",
|
|
"eprintid \n",
|
|
"1 {nan, subject} {nan} {nan} {show, nan} \n",
|
|
"10 {nan, institutional} {nan} {nan} {show, nan} \n",
|
|
"1000 {subject} {nan} {nan} {show} \n",
|
|
"10001 {nan, subject} {nan} {nan} {show, nan} \n",
|
|
"10008 {institutional} {nan} {nan} {show} \n",
|
|
"\n",
|
|
" latitude longitude relation_type relation_uri item_issues_id \\\n",
|
|
"eprintid \n",
|
|
"1 {nan} {nan} {nan} {nan} {nan} \n",
|
|
"10 {nan} {nan} {nan} {nan} {nan} \n",
|
|
"1000 {nan} {nan} {nan} {nan} {nan} \n",
|
|
"10001 {nan} {nan} {nan} {nan} {nan} \n",
|
|
"10008 {nan} {nan} {nan} {nan} {nan} \n",
|
|
"\n",
|
|
" item_issues_type item_issues_description item_issues_timestamp \\\n",
|
|
"eprintid \n",
|
|
"1 {nan} {nan} {nan} \n",
|
|
"10 {nan} {nan} {nan} \n",
|
|
"1000 {nan} {nan} {nan} \n",
|
|
"10001 {nan} {nan} {nan} \n",
|
|
"10008 {nan} {nan} {nan} \n",
|
|
"\n",
|
|
" item_issues_status item_issues_reported_by item_issues_resolved_by \\\n",
|
|
"eprintid \n",
|
|
"1 {nan} {nan} {nan} \n",
|
|
"10 {nan} {nan} {nan} \n",
|
|
"1000 {nan} {nan} {nan} \n",
|
|
"10001 {nan} {nan} {nan} \n",
|
|
"10008 {nan} {nan} {nan} \n",
|
|
"\n",
|
|
" item_issues_comment item_issues_count sword_depositor sword_slug \\\n",
|
|
"eprintid \n",
|
|
"1 {nan} {nan, 0} {nan} {nan} \n",
|
|
"10 {nan} {nan, 0} {nan} {nan} \n",
|
|
"1000 {nan} {0} {nan} {nan} \n",
|
|
"10001 {nan} {nan} {nan} {nan} \n",
|
|
"10008 {nan} {nan} {nan} {nan} \n",
|
|
"\n",
|
|
" exemplar home_page \\\n",
|
|
"eprintid \n",
|
|
"1 {nan} {nan, http://archivesic.ccsd.cnrs.fr/} \n",
|
|
"10 {nan} {nan, http://www.diva-portal.org/mdh/} \n",
|
|
"1000 {nan} {http://pam.pisharp.org/} \n",
|
|
"10001 {nan} {nan, http://edoc.sub.uni-hamburg.de/klimawand... \n",
|
|
"10008 {nan} {http://creativematter.skidmore.edu/} \n",
|
|
"\n",
|
|
" title \\\n",
|
|
"eprintid \n",
|
|
"1 {nan, @RCHIVESIC } \n",
|
|
"10 {nan, Academic Archive On-line (Mälardalen Uni... \n",
|
|
"1000 {PAM - Portuguese Archive of Mathematics} \n",
|
|
"10001 {nan, Klimawandel Dokumentenserver} \n",
|
|
"10008 {Creative Matter | Skidmore College Research} \n",
|
|
"\n",
|
|
" oai_pmh sword_endpoint \\\n",
|
|
"eprintid \n",
|
|
"1 {nan, http://archivesic.ccsd.cnrs.fr/oai/oai.php} {nan} \n",
|
|
"10 {nan, http://www.diva-portal.org/oai/mdh/OAI} {nan} \n",
|
|
"1000 {nan} {nan} \n",
|
|
"10001 {nan, http://edoc.sub.uni-hamburg.de/klimawand... {nan} \n",
|
|
"10008 {http://creativematter.skidmore.edu/do/oai/} {nan} \n",
|
|
"\n",
|
|
" rss_feed twitter_feed \\\n",
|
|
"eprintid \n",
|
|
"1 {nan} {nan} \n",
|
|
"10 {nan} {nan} \n",
|
|
"1000 {nan} {nan} \n",
|
|
"10001 {nan} {nan} \n",
|
|
"10008 {http://creativematter.skidmore.edu/recent.rss} {nan} \n",
|
|
"\n",
|
|
" description fulltext \\\n",
|
|
"eprintid \n",
|
|
"1 {nan} {nan} \n",
|
|
"10 {nan} {nan, TRUE} \n",
|
|
"1000 {nan} {TRUE} \n",
|
|
"10001 {nan, The \"Documentenserver Klimawandel\" (Repo... {nan, TRUE} \n",
|
|
"10008 {Welcome to Creative Matter, a repository for ... {TRUE} \n",
|
|
"\n",
|
|
" open_access mandate \\\n",
|
|
"eprintid \n",
|
|
"1 {nan} {nan} \n",
|
|
"10 {nan, TRUE} {nan} \n",
|
|
"1000 {TRUE} {nan} \n",
|
|
"10001 {nan, TRUE} {nan, TRUE} \n",
|
|
"10008 {FALSE} {FALSE} \n",
|
|
"\n",
|
|
" organisation_title \\\n",
|
|
"eprintid \n",
|
|
"1 {nan} \n",
|
|
"10 {nan} \n",
|
|
"1000 {nan} \n",
|
|
"10001 {nan, Helmholtz-Zentrum Geesthacht, KLIMZUG pr... \n",
|
|
"10008 {Skidmore College} \n",
|
|
"\n",
|
|
" organisation_home_page location_country \\\n",
|
|
"eprintid \n",
|
|
"1 {nan} {nan, fr} \n",
|
|
"10 {nan} {nan, se} \n",
|
|
"1000 {nan} {pt} \n",
|
|
"10001 {http://www.klimzug.de/de/94.php, nan, http://... {nan, de} \n",
|
|
"10008 {http://www.skidmore.edu/} {us} \n",
|
|
"\n",
|
|
" location_city location_latitude location_longitude \\\n",
|
|
"eprintid \n",
|
|
"1 {nan} {nan} {nan} \n",
|
|
"10 {nan, Uppsala} {nan, 59.8667} {17.6333, nan} \n",
|
|
"1000 {Bellevue, WA} {47.6034} {-122.155} \n",
|
|
"10001 {nan, Hamburg} {nan, 53.5511} {nan, 9.9937} \n",
|
|
"10008 {Saratoga Springs} {43.0961} {-73.7818} \n",
|
|
"\n",
|
|
" software geoname version \\\n",
|
|
"eprintid \n",
|
|
"1 {nan, hal} {nan, geoname_2_FR} {nan, other} \n",
|
|
"10 {nan, diva} {nan, geoname_2_SE} {nan, other} \n",
|
|
"1000 {dspace} {geoname_2_PT} {other} \n",
|
|
"10001 {nan, opus} {nan, geoname_2_DE} {nan, other} \n",
|
|
"10008 {bepress} {geoname_2_US} {other} \n",
|
|
"\n",
|
|
" subjects date note suggestions \\\n",
|
|
"eprintid \n",
|
|
"1 {nan} {nan, 2002-05-17 19:24:41} {nan} {nan} \n",
|
|
"10 {nan} {nan, 2005-12-08 13:15:22} {nan} {nan} \n",
|
|
"1000 {nan} {2006-05-04 10:48:14} {nan} {nan} \n",
|
|
"10001 {S1, G1, GF, GE, HD} {nan, 2015-07-02 08:08:31} {nan} {nan} \n",
|
|
"10008 {nan} {2015-07-06 17:35:50} {nan} {nan} \n",
|
|
"\n",
|
|
" activity_low activity_medium activity_high recordcount \\\n",
|
|
"eprintid \n",
|
|
"1 {nan, 0} {nan, 0} {nan, 0} {nan, 25} \n",
|
|
"10 {nan, 0} {nan, 0} {nan, 0} {nan, 100} \n",
|
|
"1000 {nan} {nan} {nan} {nan} \n",
|
|
"10001 {nan} {nan} {nan} {nan} \n",
|
|
"10008 {nan} {nan} {nan} {nan} \n",
|
|
"\n",
|
|
" recordhistory fulltexts_total \\\n",
|
|
"eprintid \n",
|
|
"1 {nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... {nan} \n",
|
|
"10 {nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,1... {nan} \n",
|
|
"1000 {nan} {nan} \n",
|
|
"10001 {nan} {nan} \n",
|
|
"10008 {nan} {nan} \n",
|
|
"\n",
|
|
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs \\\n",
|
|
"eprintid \n",
|
|
"1 {nan} {nan} {nan} \n",
|
|
"10 {nan} {nan} {nan} \n",
|
|
"1000 {nan} {nan} {nan} \n",
|
|
"10001 {nan} {nan} {nan} \n",
|
|
"10008 {nan} {nan} {nan} \n",
|
|
"\n",
|
|
" registry_name registry_id submit_to \\\n",
|
|
"eprintid \n",
|
|
"1 {celestial, opendoar} {669, 58} {nan} \n",
|
|
"10 {celestial, opendoar} {258, 526} {nan} \n",
|
|
"1000 {nan} {nan} {nan} \n",
|
|
"10001 {nan, celestial, opendoar} {nan, 5881, 3408} {nan} \n",
|
|
"10008 {celestial} {5882} {nan} \n",
|
|
"\n",
|
|
" submitted_to_name submitted_to_done webometrics_rank \\\n",
|
|
"eprintid \n",
|
|
"1 {nan} {nan} {nan} \n",
|
|
"10 {nan} {nan} {nan} \n",
|
|
"1000 {nan} {nan} {nan} \n",
|
|
"10001 {nan} {nan} {nan} \n",
|
|
"10008 {nan} {nan} {nan} \n",
|
|
"\n",
|
|
" webometrics_size webometrics_visibility webometrics_rich_files \\\n",
|
|
"eprintid \n",
|
|
"1 {nan} {nan} {nan} \n",
|
|
"10 {nan} {nan} {nan} \n",
|
|
"1000 {nan} {nan} {nan} \n",
|
|
"10001 {nan} {nan} {nan} \n",
|
|
"10008 {nan} {nan} {nan} \n",
|
|
"\n",
|
|
" webometrics_scholar monthly_deposits total_deposits association \n",
|
|
"eprintid \n",
|
|
"1 {nan} {nan} {nan} {nan} \n",
|
|
"10 {nan} {nan} {nan} {nan} \n",
|
|
"1000 {nan} {nan} {nan} {nan} \n",
|
|
"10001 {nan} {nan} {nan} {nan} \n",
|
|
"10008 {nan} {nan} {nan} {nan} "
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"roar_df = roar_df.groupby('eprintid').aggregate(set)\n",
|
|
"roar_df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>rev_number</th>\n",
|
|
" <th>eprint_status</th>\n",
|
|
" <th>userid</th>\n",
|
|
" <th>importid</th>\n",
|
|
" <th>source</th>\n",
|
|
" <th>dir</th>\n",
|
|
" <th>datestamp</th>\n",
|
|
" <th>lastmod</th>\n",
|
|
" <th>status_changed</th>\n",
|
|
" <th>type</th>\n",
|
|
" <th>succeeds</th>\n",
|
|
" <th>commentary</th>\n",
|
|
" <th>metadata_visibility</th>\n",
|
|
" <th>latitude</th>\n",
|
|
" <th>longitude</th>\n",
|
|
" <th>relation_type</th>\n",
|
|
" <th>relation_uri</th>\n",
|
|
" <th>item_issues_id</th>\n",
|
|
" <th>item_issues_type</th>\n",
|
|
" <th>item_issues_description</th>\n",
|
|
" <th>item_issues_timestamp</th>\n",
|
|
" <th>item_issues_status</th>\n",
|
|
" <th>item_issues_reported_by</th>\n",
|
|
" <th>item_issues_resolved_by</th>\n",
|
|
" <th>item_issues_comment</th>\n",
|
|
" <th>item_issues_count</th>\n",
|
|
" <th>sword_depositor</th>\n",
|
|
" <th>sword_slug</th>\n",
|
|
" <th>exemplar</th>\n",
|
|
" <th>home_page</th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>oai_pmh</th>\n",
|
|
" <th>sword_endpoint</th>\n",
|
|
" <th>rss_feed</th>\n",
|
|
" <th>twitter_feed</th>\n",
|
|
" <th>description</th>\n",
|
|
" <th>fulltext</th>\n",
|
|
" <th>open_access</th>\n",
|
|
" <th>mandate</th>\n",
|
|
" <th>organisation_title</th>\n",
|
|
" <th>organisation_home_page</th>\n",
|
|
" <th>location_country</th>\n",
|
|
" <th>location_city</th>\n",
|
|
" <th>location_latitude</th>\n",
|
|
" <th>location_longitude</th>\n",
|
|
" <th>software</th>\n",
|
|
" <th>geoname</th>\n",
|
|
" <th>version</th>\n",
|
|
" <th>subjects</th>\n",
|
|
" <th>date</th>\n",
|
|
" <th>note</th>\n",
|
|
" <th>suggestions</th>\n",
|
|
" <th>activity_low</th>\n",
|
|
" <th>activity_medium</th>\n",
|
|
" <th>activity_high</th>\n",
|
|
" <th>recordcount</th>\n",
|
|
" <th>recordhistory</th>\n",
|
|
" <th>fulltexts_total</th>\n",
|
|
" <th>fulltexts_docs</th>\n",
|
|
" <th>fulltexts_rtotal</th>\n",
|
|
" <th>fulltexts_rdocs</th>\n",
|
|
" <th>registry_name</th>\n",
|
|
" <th>registry_id</th>\n",
|
|
" <th>submit_to</th>\n",
|
|
" <th>submitted_to_name</th>\n",
|
|
" <th>submitted_to_done</th>\n",
|
|
" <th>webometrics_rank</th>\n",
|
|
" <th>webometrics_size</th>\n",
|
|
" <th>webometrics_visibility</th>\n",
|
|
" <th>webometrics_rich_files</th>\n",
|
|
" <th>webometrics_scholar</th>\n",
|
|
" <th>monthly_deposits</th>\n",
|
|
" <th>total_deposits</th>\n",
|
|
" <th>association</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>eprintid</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>633</td>\n",
|
|
" <td>archive</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>disk0/00/00/00/01</td>\n",
|
|
" <td>2010-01-06 13:43:48</td>\n",
|
|
" <td>2011-07-18 05:40:07</td>\n",
|
|
" <td>2010-01-06 13:43:48</td>\n",
|
|
" <td>subject</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>show</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>http://archivesic.ccsd.cnrs.fr/</td>\n",
|
|
" <td>@RCHIVESIC</td>\n",
|
|
" <td>http://archivesic.ccsd.cnrs.fr/oai/oai.php</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>fr</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>hal</td>\n",
|
|
" <td>geoname_2_FR</td>\n",
|
|
" <td>other</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2002-05-17 19:24:41</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>25</td>\n",
|
|
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>[celestial, opendoar]</td>\n",
|
|
" <td>[669, 58]</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10</th>\n",
|
|
" <td>511</td>\n",
|
|
" <td>archive</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>disk0/00/00/00/10</td>\n",
|
|
" <td>2010-01-06 13:43:48</td>\n",
|
|
" <td>2011-07-18 05:40:13</td>\n",
|
|
" <td>2010-01-06 13:43:48</td>\n",
|
|
" <td>institutional</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>show</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>http://www.diva-portal.org/mdh/</td>\n",
|
|
" <td>Academic Archive On-line (Mälardalen Universit...</td>\n",
|
|
" <td>http://www.diva-portal.org/oai/mdh/OAI</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>se</td>\n",
|
|
" <td>Uppsala</td>\n",
|
|
" <td>59.8667</td>\n",
|
|
" <td>17.6333</td>\n",
|
|
" <td>diva</td>\n",
|
|
" <td>geoname_2_SE</td>\n",
|
|
" <td>other</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2005-12-08 13:15:22</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>[celestial, opendoar]</td>\n",
|
|
" <td>[258, 526]</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1000</th>\n",
|
|
" <td>274</td>\n",
|
|
" <td>archive</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>disk0/00/00/10/00</td>\n",
|
|
" <td>2010-01-06 13:45:01</td>\n",
|
|
" <td>2011-07-06 08:21:21</td>\n",
|
|
" <td>2010-01-06 13:45:01</td>\n",
|
|
" <td>subject</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>show</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>http://pam.pisharp.org/</td>\n",
|
|
" <td>PAM - Portuguese Archive of Mathematics</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>pt</td>\n",
|
|
" <td>Bellevue, WA</td>\n",
|
|
" <td>47.6034</td>\n",
|
|
" <td>-122.155</td>\n",
|
|
" <td>dspace</td>\n",
|
|
" <td>geoname_2_PT</td>\n",
|
|
" <td>other</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2006-05-04 10:48:14</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10001</th>\n",
|
|
" <td>20</td>\n",
|
|
" <td>archive</td>\n",
|
|
" <td>91</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>disk0/00/01/00/01</td>\n",
|
|
" <td>2015-08-08 14:52:11</td>\n",
|
|
" <td>2016-03-21 19:44:01</td>\n",
|
|
" <td>2015-08-08 14:52:11</td>\n",
|
|
" <td>subject</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>show</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>http://edoc.sub.uni-hamburg.de/klimawandel/</td>\n",
|
|
" <td>Klimawandel Dokumentenserver</td>\n",
|
|
" <td>http://edoc.sub.uni-hamburg.de/klimawandel/oai</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>The \"Documentenserver Klimawandel\" (Repository...</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>[Helmholtz-Zentrum Geesthacht, KLIMZUG project...</td>\n",
|
|
" <td>[http://www.klimzug.de/de/94.php, http://www.h...</td>\n",
|
|
" <td>de</td>\n",
|
|
" <td>Hamburg</td>\n",
|
|
" <td>53.5511</td>\n",
|
|
" <td>9.9937</td>\n",
|
|
" <td>opus</td>\n",
|
|
" <td>geoname_2_DE</td>\n",
|
|
" <td>other</td>\n",
|
|
" <td>[S1, GF, HD, GE, G1]</td>\n",
|
|
" <td>2015-07-02 08:08:31</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>[celestial, opendoar]</td>\n",
|
|
" <td>[5881, 3408]</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10008</th>\n",
|
|
" <td>11</td>\n",
|
|
" <td>archive</td>\n",
|
|
" <td>404</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>disk0/00/01/00/08</td>\n",
|
|
" <td>2015-08-08 14:52:26</td>\n",
|
|
" <td>2016-03-21 19:43:51</td>\n",
|
|
" <td>2015-08-08 14:52:26</td>\n",
|
|
" <td>institutional</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>show</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>http://creativematter.skidmore.edu/</td>\n",
|
|
" <td>Creative Matter | Skidmore College Research</td>\n",
|
|
" <td>http://creativematter.skidmore.edu/do/oai/</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>http://creativematter.skidmore.edu/recent.rss</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>Welcome to Creative Matter, a repository for t...</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>FALSE</td>\n",
|
|
" <td>FALSE</td>\n",
|
|
" <td>Skidmore College</td>\n",
|
|
" <td>http://www.skidmore.edu/</td>\n",
|
|
" <td>us</td>\n",
|
|
" <td>Saratoga Springs</td>\n",
|
|
" <td>43.0961</td>\n",
|
|
" <td>-73.7818</td>\n",
|
|
" <td>bepress</td>\n",
|
|
" <td>geoname_2_US</td>\n",
|
|
" <td>other</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2015-07-06 17:35:50</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>celestial</td>\n",
|
|
" <td>5882</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" rev_number eprint_status userid importid source dir \\\n",
|
|
"eprintid \n",
|
|
"1 633 archive 1 NaN NaN disk0/00/00/00/01 \n",
|
|
"10 511 archive 1 NaN NaN disk0/00/00/00/10 \n",
|
|
"1000 274 archive 1 NaN NaN disk0/00/00/10/00 \n",
|
|
"10001 20 archive 91 NaN NaN disk0/00/01/00/01 \n",
|
|
"10008 11 archive 404 NaN NaN disk0/00/01/00/08 \n",
|
|
"\n",
|
|
" datestamp lastmod status_changed \\\n",
|
|
"eprintid \n",
|
|
"1 2010-01-06 13:43:48 2011-07-18 05:40:07 2010-01-06 13:43:48 \n",
|
|
"10 2010-01-06 13:43:48 2011-07-18 05:40:13 2010-01-06 13:43:48 \n",
|
|
"1000 2010-01-06 13:45:01 2011-07-06 08:21:21 2010-01-06 13:45:01 \n",
|
|
"10001 2015-08-08 14:52:11 2016-03-21 19:44:01 2015-08-08 14:52:11 \n",
|
|
"10008 2015-08-08 14:52:26 2016-03-21 19:43:51 2015-08-08 14:52:26 \n",
|
|
"\n",
|
|
" type succeeds commentary metadata_visibility latitude \\\n",
|
|
"eprintid \n",
|
|
"1 subject NaN NaN show NaN \n",
|
|
"10 institutional NaN NaN show NaN \n",
|
|
"1000 subject NaN NaN show NaN \n",
|
|
"10001 subject NaN NaN show NaN \n",
|
|
"10008 institutional NaN NaN show NaN \n",
|
|
"\n",
|
|
" longitude relation_type relation_uri item_issues_id \\\n",
|
|
"eprintid \n",
|
|
"1 NaN NaN NaN NaN \n",
|
|
"10 NaN NaN NaN NaN \n",
|
|
"1000 NaN NaN NaN NaN \n",
|
|
"10001 NaN NaN NaN NaN \n",
|
|
"10008 NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" item_issues_type item_issues_description item_issues_timestamp \\\n",
|
|
"eprintid \n",
|
|
"1 NaN NaN NaN \n",
|
|
"10 NaN NaN NaN \n",
|
|
"1000 NaN NaN NaN \n",
|
|
"10001 NaN NaN NaN \n",
|
|
"10008 NaN NaN NaN \n",
|
|
"\n",
|
|
" item_issues_status item_issues_reported_by item_issues_resolved_by \\\n",
|
|
"eprintid \n",
|
|
"1 NaN NaN NaN \n",
|
|
"10 NaN NaN NaN \n",
|
|
"1000 NaN NaN NaN \n",
|
|
"10001 NaN NaN NaN \n",
|
|
"10008 NaN NaN NaN \n",
|
|
"\n",
|
|
" item_issues_comment item_issues_count sword_depositor sword_slug \\\n",
|
|
"eprintid \n",
|
|
"1 NaN 0 NaN NaN \n",
|
|
"10 NaN 0 NaN NaN \n",
|
|
"1000 NaN 0 NaN NaN \n",
|
|
"10001 NaN NaN NaN NaN \n",
|
|
"10008 NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" exemplar home_page \\\n",
|
|
"eprintid \n",
|
|
"1 NaN http://archivesic.ccsd.cnrs.fr/ \n",
|
|
"10 NaN http://www.diva-portal.org/mdh/ \n",
|
|
"1000 NaN http://pam.pisharp.org/ \n",
|
|
"10001 NaN http://edoc.sub.uni-hamburg.de/klimawandel/ \n",
|
|
"10008 NaN http://creativematter.skidmore.edu/ \n",
|
|
"\n",
|
|
" title \\\n",
|
|
"eprintid \n",
|
|
"1 @RCHIVESIC \n",
|
|
"10 Academic Archive On-line (Mälardalen Universit... \n",
|
|
"1000 PAM - Portuguese Archive of Mathematics \n",
|
|
"10001 Klimawandel Dokumentenserver \n",
|
|
"10008 Creative Matter | Skidmore College Research \n",
|
|
"\n",
|
|
" oai_pmh sword_endpoint \\\n",
|
|
"eprintid \n",
|
|
"1 http://archivesic.ccsd.cnrs.fr/oai/oai.php NaN \n",
|
|
"10 http://www.diva-portal.org/oai/mdh/OAI NaN \n",
|
|
"1000 NaN NaN \n",
|
|
"10001 http://edoc.sub.uni-hamburg.de/klimawandel/oai NaN \n",
|
|
"10008 http://creativematter.skidmore.edu/do/oai/ NaN \n",
|
|
"\n",
|
|
" rss_feed twitter_feed \\\n",
|
|
"eprintid \n",
|
|
"1 NaN NaN \n",
|
|
"10 NaN NaN \n",
|
|
"1000 NaN NaN \n",
|
|
"10001 NaN NaN \n",
|
|
"10008 http://creativematter.skidmore.edu/recent.rss NaN \n",
|
|
"\n",
|
|
" description fulltext \\\n",
|
|
"eprintid \n",
|
|
"1 NaN NaN \n",
|
|
"10 NaN TRUE \n",
|
|
"1000 NaN TRUE \n",
|
|
"10001 The \"Documentenserver Klimawandel\" (Repository... TRUE \n",
|
|
"10008 Welcome to Creative Matter, a repository for t... TRUE \n",
|
|
"\n",
|
|
" open_access mandate \\\n",
|
|
"eprintid \n",
|
|
"1 NaN NaN \n",
|
|
"10 TRUE NaN \n",
|
|
"1000 TRUE NaN \n",
|
|
"10001 TRUE TRUE \n",
|
|
"10008 FALSE FALSE \n",
|
|
"\n",
|
|
" organisation_title \\\n",
|
|
"eprintid \n",
|
|
"1 NaN \n",
|
|
"10 NaN \n",
|
|
"1000 NaN \n",
|
|
"10001 [Helmholtz-Zentrum Geesthacht, KLIMZUG project... \n",
|
|
"10008 Skidmore College \n",
|
|
"\n",
|
|
" organisation_home_page location_country \\\n",
|
|
"eprintid \n",
|
|
"1 NaN fr \n",
|
|
"10 NaN se \n",
|
|
"1000 NaN pt \n",
|
|
"10001 [http://www.klimzug.de/de/94.php, http://www.h... de \n",
|
|
"10008 http://www.skidmore.edu/ us \n",
|
|
"\n",
|
|
" location_city location_latitude location_longitude software \\\n",
|
|
"eprintid \n",
|
|
"1 NaN NaN NaN hal \n",
|
|
"10 Uppsala 59.8667 17.6333 diva \n",
|
|
"1000 Bellevue, WA 47.6034 -122.155 dspace \n",
|
|
"10001 Hamburg 53.5511 9.9937 opus \n",
|
|
"10008 Saratoga Springs 43.0961 -73.7818 bepress \n",
|
|
"\n",
|
|
" geoname version subjects date \\\n",
|
|
"eprintid \n",
|
|
"1 geoname_2_FR other NaN 2002-05-17 19:24:41 \n",
|
|
"10 geoname_2_SE other NaN 2005-12-08 13:15:22 \n",
|
|
"1000 geoname_2_PT other NaN 2006-05-04 10:48:14 \n",
|
|
"10001 geoname_2_DE other [S1, GF, HD, GE, G1] 2015-07-02 08:08:31 \n",
|
|
"10008 geoname_2_US other NaN 2015-07-06 17:35:50 \n",
|
|
"\n",
|
|
" note suggestions activity_low activity_medium activity_high \\\n",
|
|
"eprintid \n",
|
|
"1 NaN NaN 0 0 0 \n",
|
|
"10 NaN NaN 0 0 0 \n",
|
|
"1000 NaN NaN NaN NaN NaN \n",
|
|
"10001 NaN NaN NaN NaN NaN \n",
|
|
"10008 NaN NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" recordcount recordhistory \\\n",
|
|
"eprintid \n",
|
|
"1 25 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n",
|
|
"10 100 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100... \n",
|
|
"1000 NaN NaN \n",
|
|
"10001 NaN NaN \n",
|
|
"10008 NaN NaN \n",
|
|
"\n",
|
|
" fulltexts_total fulltexts_docs fulltexts_rtotal fulltexts_rdocs \\\n",
|
|
"eprintid \n",
|
|
"1 NaN NaN NaN NaN \n",
|
|
"10 NaN NaN NaN NaN \n",
|
|
"1000 NaN NaN NaN NaN \n",
|
|
"10001 NaN NaN NaN NaN \n",
|
|
"10008 NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" registry_name registry_id submit_to submitted_to_name \\\n",
|
|
"eprintid \n",
|
|
"1 [celestial, opendoar] [669, 58] NaN NaN \n",
|
|
"10 [celestial, opendoar] [258, 526] NaN NaN \n",
|
|
"1000 NaN NaN NaN NaN \n",
|
|
"10001 [celestial, opendoar] [5881, 3408] NaN NaN \n",
|
|
"10008 celestial 5882 NaN NaN \n",
|
|
"\n",
|
|
" submitted_to_done webometrics_rank webometrics_size \\\n",
|
|
"eprintid \n",
|
|
"1 NaN NaN NaN \n",
|
|
"10 NaN NaN NaN \n",
|
|
"1000 NaN NaN NaN \n",
|
|
"10001 NaN NaN NaN \n",
|
|
"10008 NaN NaN NaN \n",
|
|
"\n",
|
|
" webometrics_visibility webometrics_rich_files webometrics_scholar \\\n",
|
|
"eprintid \n",
|
|
"1 NaN NaN NaN \n",
|
|
"10 NaN NaN NaN \n",
|
|
"1000 NaN NaN NaN \n",
|
|
"10001 NaN NaN NaN \n",
|
|
"10008 NaN NaN NaN \n",
|
|
"\n",
|
|
" monthly_deposits total_deposits association \n",
|
|
"eprintid \n",
|
|
"1 NaN NaN NaN \n",
|
|
"10 NaN NaN NaN \n",
|
|
"1000 NaN NaN NaN \n",
|
|
"10001 NaN NaN NaN \n",
|
|
"10008 NaN NaN NaN "
|
|
]
|
|
},
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"def value_or_list(cell_set):\n",
|
|
" copy = set(cell_set)\n",
|
|
" copy.discard(np.nan) \n",
|
|
" if len(copy) == 0:\n",
|
|
" return np.nan\n",
|
|
" if len(copy) == 1:\n",
|
|
" return copy.pop()\n",
|
|
" return list(copy)\n",
|
|
" \n",
|
|
"roar_df = roar_df.applymap(value_or_list)\n",
|
|
"roar_df.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>eprintid</th>\n",
|
|
" <th>rev_number</th>\n",
|
|
" <th>eprint_status</th>\n",
|
|
" <th>userid</th>\n",
|
|
" <th>importid</th>\n",
|
|
" <th>source</th>\n",
|
|
" <th>dir</th>\n",
|
|
" <th>datestamp</th>\n",
|
|
" <th>lastmod</th>\n",
|
|
" <th>status_changed</th>\n",
|
|
" <th>type</th>\n",
|
|
" <th>succeeds</th>\n",
|
|
" <th>commentary</th>\n",
|
|
" <th>metadata_visibility</th>\n",
|
|
" <th>latitude</th>\n",
|
|
" <th>longitude</th>\n",
|
|
" <th>relation_type</th>\n",
|
|
" <th>relation_uri</th>\n",
|
|
" <th>item_issues_id</th>\n",
|
|
" <th>item_issues_type</th>\n",
|
|
" <th>item_issues_description</th>\n",
|
|
" <th>item_issues_timestamp</th>\n",
|
|
" <th>item_issues_status</th>\n",
|
|
" <th>item_issues_reported_by</th>\n",
|
|
" <th>item_issues_resolved_by</th>\n",
|
|
" <th>item_issues_comment</th>\n",
|
|
" <th>item_issues_count</th>\n",
|
|
" <th>sword_depositor</th>\n",
|
|
" <th>sword_slug</th>\n",
|
|
" <th>exemplar</th>\n",
|
|
" <th>home_page</th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>oai_pmh</th>\n",
|
|
" <th>sword_endpoint</th>\n",
|
|
" <th>rss_feed</th>\n",
|
|
" <th>twitter_feed</th>\n",
|
|
" <th>description</th>\n",
|
|
" <th>fulltext</th>\n",
|
|
" <th>open_access</th>\n",
|
|
" <th>mandate</th>\n",
|
|
" <th>organisation_title</th>\n",
|
|
" <th>organisation_home_page</th>\n",
|
|
" <th>location_country</th>\n",
|
|
" <th>location_city</th>\n",
|
|
" <th>location_latitude</th>\n",
|
|
" <th>location_longitude</th>\n",
|
|
" <th>software</th>\n",
|
|
" <th>geoname</th>\n",
|
|
" <th>version</th>\n",
|
|
" <th>subjects</th>\n",
|
|
" <th>date</th>\n",
|
|
" <th>note</th>\n",
|
|
" <th>suggestions</th>\n",
|
|
" <th>activity_low</th>\n",
|
|
" <th>activity_medium</th>\n",
|
|
" <th>activity_high</th>\n",
|
|
" <th>recordcount</th>\n",
|
|
" <th>recordhistory</th>\n",
|
|
" <th>fulltexts_total</th>\n",
|
|
" <th>fulltexts_docs</th>\n",
|
|
" <th>fulltexts_rtotal</th>\n",
|
|
" <th>fulltexts_rdocs</th>\n",
|
|
" <th>registry_name</th>\n",
|
|
" <th>registry_id</th>\n",
|
|
" <th>submit_to</th>\n",
|
|
" <th>submitted_to_name</th>\n",
|
|
" <th>submitted_to_done</th>\n",
|
|
" <th>webometrics_rank</th>\n",
|
|
" <th>webometrics_size</th>\n",
|
|
" <th>webometrics_visibility</th>\n",
|
|
" <th>webometrics_rich_files</th>\n",
|
|
" <th>webometrics_scholar</th>\n",
|
|
" <th>monthly_deposits</th>\n",
|
|
" <th>total_deposits</th>\n",
|
|
" <th>association</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>4259</th>\n",
|
|
" <td>606</td>\n",
|
|
" <td>657</td>\n",
|
|
" <td>archive</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>disk0/00/00/06/06</td>\n",
|
|
" <td>2010-01-06 13:44:31</td>\n",
|
|
" <td>2016-04-17 21:53:14</td>\n",
|
|
" <td>2010-01-06 13:44:31</td>\n",
|
|
" <td>subject</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>show</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>http://hal.archives-ouvertes.fr/</td>\n",
|
|
" <td>HAL: Hyper Article en Ligne</td>\n",
|
|
" <td>http://hal.archives-ouvertes.fr/oai/oai.php</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>fr</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>hal</td>\n",
|
|
" <td>geoname_2_FR</td>\n",
|
|
" <td>other</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1998-11-02 11:53:57</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>675816</td>\n",
|
|
" <td>4,12,17,26,43,57,81,185,431,861,1184,1517,2442...</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>[celestial, opendoar, roarmap]</td>\n",
|
|
" <td>[69, 1106, 166]</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>6</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
|
|
" <td>3063</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" eprintid rev_number eprint_status userid importid source \\\n",
|
|
"4259 606 657 archive 1 NaN NaN \n",
|
|
"\n",
|
|
" dir datestamp lastmod \\\n",
|
|
"4259 disk0/00/00/06/06 2010-01-06 13:44:31 2016-04-17 21:53:14 \n",
|
|
"\n",
|
|
" status_changed type succeeds commentary metadata_visibility \\\n",
|
|
"4259 2010-01-06 13:44:31 subject NaN NaN show \n",
|
|
"\n",
|
|
" latitude longitude relation_type relation_uri item_issues_id \\\n",
|
|
"4259 NaN NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" item_issues_type item_issues_description item_issues_timestamp \\\n",
|
|
"4259 NaN NaN NaN \n",
|
|
"\n",
|
|
" item_issues_status item_issues_reported_by item_issues_resolved_by \\\n",
|
|
"4259 NaN NaN NaN \n",
|
|
"\n",
|
|
" item_issues_comment item_issues_count sword_depositor sword_slug \\\n",
|
|
"4259 NaN 0 NaN NaN \n",
|
|
"\n",
|
|
" exemplar home_page title \\\n",
|
|
"4259 NaN http://hal.archives-ouvertes.fr/ HAL: Hyper Article en Ligne \n",
|
|
"\n",
|
|
" oai_pmh sword_endpoint rss_feed \\\n",
|
|
"4259 http://hal.archives-ouvertes.fr/oai/oai.php NaN NaN \n",
|
|
"\n",
|
|
" twitter_feed description fulltext open_access mandate organisation_title \\\n",
|
|
"4259 NaN NaN TRUE TRUE NaN NaN \n",
|
|
"\n",
|
|
" organisation_home_page location_country location_city location_latitude \\\n",
|
|
"4259 NaN fr NaN NaN \n",
|
|
"\n",
|
|
" location_longitude software geoname version subjects \\\n",
|
|
"4259 NaN hal geoname_2_FR other NaN \n",
|
|
"\n",
|
|
" date note suggestions activity_low activity_medium \\\n",
|
|
"4259 1998-11-02 11:53:57 NaN NaN 0 0 \n",
|
|
"\n",
|
|
" activity_high recordcount \\\n",
|
|
"4259 2 675816 \n",
|
|
"\n",
|
|
" recordhistory fulltexts_total \\\n",
|
|
"4259 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n",
|
|
"\n",
|
|
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs \\\n",
|
|
"4259 NaN NaN NaN \n",
|
|
"\n",
|
|
" registry_name registry_id submit_to \\\n",
|
|
"4259 [celestial, opendoar, roarmap] [69, 1106, 166] NaN \n",
|
|
"\n",
|
|
" submitted_to_name submitted_to_done webometrics_rank webometrics_size \\\n",
|
|
"4259 NaN NaN 1 6 \n",
|
|
"\n",
|
|
" webometrics_visibility webometrics_rich_files webometrics_scholar \\\n",
|
|
"4259 1 1 1 \n",
|
|
"\n",
|
|
" monthly_deposits total_deposits \\\n",
|
|
"4259 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 3063 \n",
|
|
"\n",
|
|
" association \n",
|
|
"4259 NaN "
|
|
]
|
|
},
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"roar_df.reset_index(inplace=True)\n",
|
|
"roar_df[roar_df.eprintid == '606']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>eprintid</th>\n",
|
|
" <th>rev_number</th>\n",
|
|
" <th>eprint_status</th>\n",
|
|
" <th>userid</th>\n",
|
|
" <th>importid</th>\n",
|
|
" <th>source</th>\n",
|
|
" <th>dir</th>\n",
|
|
" <th>datestamp</th>\n",
|
|
" <th>lastmod</th>\n",
|
|
" <th>status_changed</th>\n",
|
|
" <th>type</th>\n",
|
|
" <th>succeeds</th>\n",
|
|
" <th>commentary</th>\n",
|
|
" <th>metadata_visibility</th>\n",
|
|
" <th>latitude</th>\n",
|
|
" <th>longitude</th>\n",
|
|
" <th>relation_type</th>\n",
|
|
" <th>relation_uri</th>\n",
|
|
" <th>item_issues_id</th>\n",
|
|
" <th>item_issues_type</th>\n",
|
|
" <th>item_issues_description</th>\n",
|
|
" <th>item_issues_timestamp</th>\n",
|
|
" <th>item_issues_status</th>\n",
|
|
" <th>item_issues_reported_by</th>\n",
|
|
" <th>item_issues_resolved_by</th>\n",
|
|
" <th>item_issues_comment</th>\n",
|
|
" <th>item_issues_count</th>\n",
|
|
" <th>sword_depositor</th>\n",
|
|
" <th>sword_slug</th>\n",
|
|
" <th>exemplar</th>\n",
|
|
" <th>home_page</th>\n",
|
|
" <th>title</th>\n",
|
|
" <th>oai_pmh</th>\n",
|
|
" <th>sword_endpoint</th>\n",
|
|
" <th>rss_feed</th>\n",
|
|
" <th>twitter_feed</th>\n",
|
|
" <th>description</th>\n",
|
|
" <th>fulltext</th>\n",
|
|
" <th>open_access</th>\n",
|
|
" <th>mandate</th>\n",
|
|
" <th>organisation_title</th>\n",
|
|
" <th>organisation_home_page</th>\n",
|
|
" <th>location_country</th>\n",
|
|
" <th>location_city</th>\n",
|
|
" <th>location_latitude</th>\n",
|
|
" <th>location_longitude</th>\n",
|
|
" <th>software</th>\n",
|
|
" <th>geoname</th>\n",
|
|
" <th>version</th>\n",
|
|
" <th>subjects</th>\n",
|
|
" <th>date</th>\n",
|
|
" <th>note</th>\n",
|
|
" <th>suggestions</th>\n",
|
|
" <th>activity_low</th>\n",
|
|
" <th>activity_medium</th>\n",
|
|
" <th>activity_high</th>\n",
|
|
" <th>recordcount</th>\n",
|
|
" <th>recordhistory</th>\n",
|
|
" <th>fulltexts_total</th>\n",
|
|
" <th>fulltexts_docs</th>\n",
|
|
" <th>fulltexts_rtotal</th>\n",
|
|
" <th>fulltexts_rdocs</th>\n",
|
|
" <th>registry_name</th>\n",
|
|
" <th>registry_id</th>\n",
|
|
" <th>submit_to</th>\n",
|
|
" <th>submitted_to_name</th>\n",
|
|
" <th>submitted_to_done</th>\n",
|
|
" <th>webometrics_rank</th>\n",
|
|
" <th>webometrics_size</th>\n",
|
|
" <th>webometrics_visibility</th>\n",
|
|
" <th>webometrics_rich_files</th>\n",
|
|
" <th>webometrics_scholar</th>\n",
|
|
" <th>monthly_deposits</th>\n",
|
|
" <th>total_deposits</th>\n",
|
|
" <th>association</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>count</th>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>108</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>63</td>\n",
|
|
" <td>63</td>\n",
|
|
" <td>63</td>\n",
|
|
" <td>63</td>\n",
|
|
" <td>63</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>2242</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>268</td>\n",
|
|
" <td>5437</td>\n",
|
|
" <td>5442</td>\n",
|
|
" <td>4332</td>\n",
|
|
" <td>178</td>\n",
|
|
" <td>1538</td>\n",
|
|
" <td>116</td>\n",
|
|
" <td>3837</td>\n",
|
|
" <td>4197</td>\n",
|
|
" <td>4197</td>\n",
|
|
" <td>3746</td>\n",
|
|
" <td>4460</td>\n",
|
|
" <td>4286</td>\n",
|
|
" <td>5138</td>\n",
|
|
" <td>3714</td>\n",
|
|
" <td>3725</td>\n",
|
|
" <td>3708</td>\n",
|
|
" <td>4700</td>\n",
|
|
" <td>4730</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>1289</td>\n",
|
|
" <td>5429</td>\n",
|
|
" <td>218</td>\n",
|
|
" <td>189</td>\n",
|
|
" <td>2288</td>\n",
|
|
" <td>2288</td>\n",
|
|
" <td>2288</td>\n",
|
|
" <td>2290</td>\n",
|
|
" <td>2288</td>\n",
|
|
" <td>270</td>\n",
|
|
" <td>258</td>\n",
|
|
" <td>270</td>\n",
|
|
" <td>258</td>\n",
|
|
" <td>4605</td>\n",
|
|
" <td>4580</td>\n",
|
|
" <td>375</td>\n",
|
|
" <td>205</td>\n",
|
|
" <td>205</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>756</td>\n",
|
|
" <td>756</td>\n",
|
|
" <td>223</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>unique</th>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>660</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>2189</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>4198</td>\n",
|
|
" <td>4043</td>\n",
|
|
" <td>4230</td>\n",
|
|
" <td>12</td>\n",
|
|
" <td>108</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>48</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>62</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>5271</td>\n",
|
|
" <td>5143</td>\n",
|
|
" <td>4059</td>\n",
|
|
" <td>172</td>\n",
|
|
" <td>1485</td>\n",
|
|
" <td>112</td>\n",
|
|
" <td>3359</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>3858</td>\n",
|
|
" <td>3831</td>\n",
|
|
" <td>144</td>\n",
|
|
" <td>1884</td>\n",
|
|
" <td>2923</td>\n",
|
|
" <td>2953</td>\n",
|
|
" <td>31</td>\n",
|
|
" <td>126</td>\n",
|
|
" <td>53</td>\n",
|
|
" <td>938</td>\n",
|
|
" <td>4898</td>\n",
|
|
" <td>210</td>\n",
|
|
" <td>173</td>\n",
|
|
" <td>72</td>\n",
|
|
" <td>54</td>\n",
|
|
" <td>16</td>\n",
|
|
" <td>741</td>\n",
|
|
" <td>1702</td>\n",
|
|
" <td>135</td>\n",
|
|
" <td>118</td>\n",
|
|
" <td>134</td>\n",
|
|
" <td>117</td>\n",
|
|
" <td>8</td>\n",
|
|
" <td>4259</td>\n",
|
|
" <td>7</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>146</td>\n",
|
|
" <td>143</td>\n",
|
|
" <td>346</td>\n",
|
|
" <td>342</td>\n",
|
|
" <td>3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>top</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>11</td>\n",
|
|
" <td>archive</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>disk0/00/00/00/01</td>\n",
|
|
" <td>2010-01-06 13:43:48</td>\n",
|
|
" <td>2011-07-06 08:24:53</td>\n",
|
|
" <td>2010-01-06 13:43:48</td>\n",
|
|
" <td>institutional</td>\n",
|
|
" <td>10164</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>show</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>bad_oai_pmh_url_0</td>\n",
|
|
" <td>duplicate_title</td>\n",
|
|
" <td>Duplicate title to <xhtml:table xmlns:xhtml=\"h...</td>\n",
|
|
" <td>2010-01-13 10:44:49</td>\n",
|
|
" <td>discovered</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>FALSE</td>\n",
|
|
" <td>http://eprints.upnjatim.ac.id/</td>\n",
|
|
" <td>Repositorio Institucional</td>\n",
|
|
" <td>http://kce.docressources.info/ws/PMBWs_2</td>\n",
|
|
" <td>http://producao.usp.br/sword/servicedocument</td>\n",
|
|
" <td>http://eprints.upnjatim.ac.id/cgi/latest_tool?...</td>\n",
|
|
" <td>http://my.indexcopernicus.com/fredemoreno</td>\n",
|
|
" <td>info:other:archives.eprints.org:import</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>TRUE</td>\n",
|
|
" <td>FALSE</td>\n",
|
|
" <td>Chinese Academy of Science (中国科学院)</td>\n",
|
|
" <td>http://www.cas.cn/</td>\n",
|
|
" <td>us</td>\n",
|
|
" <td>Lima</td>\n",
|
|
" <td>34.1607</td>\n",
|
|
" <td>-118.139</td>\n",
|
|
" <td>dspace</td>\n",
|
|
" <td>geoname_2_US</td>\n",
|
|
" <td>other</td>\n",
|
|
" <td>K1</td>\n",
|
|
" <td>2006-05-04 10:48:14</td>\n",
|
|
" <td>¿Quién puede depositar documentos en el reposi...</td>\n",
|
|
" <td>This repository is hosted by the Texas Digital...</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>[celestial, opendoar]</td>\n",
|
|
" <td>[1879, 2246]</td>\n",
|
|
" <td>[celestial, opendoar, roarmap]</td>\n",
|
|
" <td>opendoar</td>\n",
|
|
" <td>2021-01-25</td>\n",
|
|
" <td>24</td>\n",
|
|
" <td>46</td>\n",
|
|
" <td>20</td>\n",
|
|
" <td>824</td>\n",
|
|
" <td>806</td>\n",
|
|
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
|
|
" <td>0</td>\n",
|
|
" <td>russell_group</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>freq</th>\n",
|
|
" <td>1</td>\n",
|
|
" <td>333</td>\n",
|
|
" <td>5444</td>\n",
|
|
" <td>1330</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>16</td>\n",
|
|
" <td>8</td>\n",
|
|
" <td>16</td>\n",
|
|
" <td>3853</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>5402</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>15</td>\n",
|
|
" <td>33</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>45</td>\n",
|
|
" <td>38</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>2201</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>261</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>7</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>112</td>\n",
|
|
" <td>2805</td>\n",
|
|
" <td>2696</td>\n",
|
|
" <td>2748</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>891</td>\n",
|
|
" <td>74</td>\n",
|
|
" <td>25</td>\n",
|
|
" <td>25</td>\n",
|
|
" <td>2341</td>\n",
|
|
" <td>845</td>\n",
|
|
" <td>4841</td>\n",
|
|
" <td>53</td>\n",
|
|
" <td>99</td>\n",
|
|
" <td>2</td>\n",
|
|
" <td>9</td>\n",
|
|
" <td>2012</td>\n",
|
|
" <td>2074</td>\n",
|
|
" <td>2210</td>\n",
|
|
" <td>730</td>\n",
|
|
" <td>95</td>\n",
|
|
" <td>113</td>\n",
|
|
" <td>114</td>\n",
|
|
" <td>113</td>\n",
|
|
" <td>114</td>\n",
|
|
" <td>2106</td>\n",
|
|
" <td>4</td>\n",
|
|
" <td>119</td>\n",
|
|
" <td>205</td>\n",
|
|
" <td>205</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>1</td>\n",
|
|
" <td>3</td>\n",
|
|
" <td>5</td>\n",
|
|
" <td>387</td>\n",
|
|
" <td>387</td>\n",
|
|
" <td>130</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>mean</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>std</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>min</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25%</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>50%</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>75%</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>max</th>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" eprintid rev_number eprint_status userid importid source \\\n",
|
|
"count 5444 5444 5444 5444 0.0 0.0 \n",
|
|
"unique 5444 660 1 2189 NaN NaN \n",
|
|
"top 1 11 archive 1 NaN NaN \n",
|
|
"freq 1 333 5444 1330 NaN NaN \n",
|
|
"mean NaN NaN NaN NaN NaN NaN \n",
|
|
"std NaN NaN NaN NaN NaN NaN \n",
|
|
"min NaN NaN NaN NaN NaN NaN \n",
|
|
"25% NaN NaN NaN NaN NaN NaN \n",
|
|
"50% NaN NaN NaN NaN NaN NaN \n",
|
|
"75% NaN NaN NaN NaN NaN NaN \n",
|
|
"max NaN NaN NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" dir datestamp lastmod \\\n",
|
|
"count 5444 5444 5444 \n",
|
|
"unique 5444 4198 4043 \n",
|
|
"top disk0/00/00/00/01 2010-01-06 13:43:48 2011-07-06 08:24:53 \n",
|
|
"freq 1 16 8 \n",
|
|
"mean NaN NaN NaN \n",
|
|
"std NaN NaN NaN \n",
|
|
"min NaN NaN NaN \n",
|
|
"25% NaN NaN NaN \n",
|
|
"50% NaN NaN NaN \n",
|
|
"75% NaN NaN NaN \n",
|
|
"max NaN NaN NaN \n",
|
|
"\n",
|
|
" status_changed type succeeds commentary \\\n",
|
|
"count 5444 5444 108 0.0 \n",
|
|
"unique 4230 12 108 NaN \n",
|
|
"top 2010-01-06 13:43:48 institutional 10164 NaN \n",
|
|
"freq 16 3853 1 NaN \n",
|
|
"mean NaN NaN NaN NaN \n",
|
|
"std NaN NaN NaN NaN \n",
|
|
"min NaN NaN NaN NaN \n",
|
|
"25% NaN NaN NaN NaN \n",
|
|
"50% NaN NaN NaN NaN \n",
|
|
"75% NaN NaN NaN NaN \n",
|
|
"max NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" metadata_visibility latitude longitude relation_type relation_uri \\\n",
|
|
"count 5444 0.0 0.0 0.0 0.0 \n",
|
|
"unique 2 NaN NaN NaN NaN \n",
|
|
"top show NaN NaN NaN NaN \n",
|
|
"freq 5402 NaN NaN NaN NaN \n",
|
|
"mean NaN NaN NaN NaN NaN \n",
|
|
"std NaN NaN NaN NaN NaN \n",
|
|
"min NaN NaN NaN NaN NaN \n",
|
|
"25% NaN NaN NaN NaN NaN \n",
|
|
"50% NaN NaN NaN NaN NaN \n",
|
|
"75% NaN NaN NaN NaN NaN \n",
|
|
"max NaN NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" item_issues_id item_issues_type \\\n",
|
|
"count 63 63 \n",
|
|
"unique 48 5 \n",
|
|
"top bad_oai_pmh_url_0 duplicate_title \n",
|
|
"freq 15 33 \n",
|
|
"mean NaN NaN \n",
|
|
"std NaN NaN \n",
|
|
"min NaN NaN \n",
|
|
"25% NaN NaN \n",
|
|
"50% NaN NaN \n",
|
|
"75% NaN NaN \n",
|
|
"max NaN NaN \n",
|
|
"\n",
|
|
" item_issues_description \\\n",
|
|
"count 63 \n",
|
|
"unique 62 \n",
|
|
"top Duplicate title to <xhtml:table xmlns:xhtml=\"h... \n",
|
|
"freq 2 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
|
|
"count 63 63 0.0 \n",
|
|
"unique 4 3 NaN \n",
|
|
"top 2010-01-13 10:44:49 discovered NaN \n",
|
|
"freq 45 38 NaN \n",
|
|
"mean NaN NaN NaN \n",
|
|
"std NaN NaN NaN \n",
|
|
"min NaN NaN NaN \n",
|
|
"25% NaN NaN NaN \n",
|
|
"50% NaN NaN NaN \n",
|
|
"75% NaN NaN NaN \n",
|
|
"max NaN NaN NaN \n",
|
|
"\n",
|
|
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
|
|
"count 0.0 0.0 2242 \n",
|
|
"unique NaN NaN 4 \n",
|
|
"top NaN NaN 0 \n",
|
|
"freq NaN NaN 2201 \n",
|
|
"mean NaN NaN NaN \n",
|
|
"std NaN NaN NaN \n",
|
|
"min NaN NaN NaN \n",
|
|
"25% NaN NaN NaN \n",
|
|
"50% NaN NaN NaN \n",
|
|
"75% NaN NaN NaN \n",
|
|
"max NaN NaN NaN \n",
|
|
"\n",
|
|
" sword_depositor sword_slug exemplar home_page \\\n",
|
|
"count 0.0 0.0 268 5437 \n",
|
|
"unique NaN NaN 2 5271 \n",
|
|
"top NaN NaN FALSE http://eprints.upnjatim.ac.id/ \n",
|
|
"freq NaN NaN 261 4 \n",
|
|
"mean NaN NaN NaN NaN \n",
|
|
"std NaN NaN NaN NaN \n",
|
|
"min NaN NaN NaN NaN \n",
|
|
"25% NaN NaN NaN NaN \n",
|
|
"50% NaN NaN NaN NaN \n",
|
|
"75% NaN NaN NaN NaN \n",
|
|
"max NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" title oai_pmh \\\n",
|
|
"count 5442 4332 \n",
|
|
"unique 5143 4059 \n",
|
|
"top Repositorio Institucional http://kce.docressources.info/ws/PMBWs_2 \n",
|
|
"freq 7 4 \n",
|
|
"mean NaN NaN \n",
|
|
"std NaN NaN \n",
|
|
"min NaN NaN \n",
|
|
"25% NaN NaN \n",
|
|
"50% NaN NaN \n",
|
|
"75% NaN NaN \n",
|
|
"max NaN NaN \n",
|
|
"\n",
|
|
" sword_endpoint \\\n",
|
|
"count 178 \n",
|
|
"unique 172 \n",
|
|
"top http://producao.usp.br/sword/servicedocument \n",
|
|
"freq 2 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" rss_feed \\\n",
|
|
"count 1538 \n",
|
|
"unique 1485 \n",
|
|
"top http://eprints.upnjatim.ac.id/cgi/latest_tool?... \n",
|
|
"freq 5 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" twitter_feed \\\n",
|
|
"count 116 \n",
|
|
"unique 112 \n",
|
|
"top http://my.indexcopernicus.com/fredemoreno \n",
|
|
"freq 2 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" description fulltext open_access mandate \\\n",
|
|
"count 3837 4197 4197 3746 \n",
|
|
"unique 3359 2 2 2 \n",
|
|
"top info:other:archives.eprints.org:import TRUE TRUE FALSE \n",
|
|
"freq 112 2805 2696 2748 \n",
|
|
"mean NaN NaN NaN NaN \n",
|
|
"std NaN NaN NaN NaN \n",
|
|
"min NaN NaN NaN NaN \n",
|
|
"25% NaN NaN NaN NaN \n",
|
|
"50% NaN NaN NaN NaN \n",
|
|
"75% NaN NaN NaN NaN \n",
|
|
"max NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" organisation_title organisation_home_page \\\n",
|
|
"count 4460 4286 \n",
|
|
"unique 3858 3831 \n",
|
|
"top Chinese Academy of Science (中国科学院) http://www.cas.cn/ \n",
|
|
"freq 9 9 \n",
|
|
"mean NaN NaN \n",
|
|
"std NaN NaN \n",
|
|
"min NaN NaN \n",
|
|
"25% NaN NaN \n",
|
|
"50% NaN NaN \n",
|
|
"75% NaN NaN \n",
|
|
"max NaN NaN \n",
|
|
"\n",
|
|
" location_country location_city location_latitude location_longitude \\\n",
|
|
"count 5138 3714 3725 3708 \n",
|
|
"unique 144 1884 2923 2953 \n",
|
|
"top us Lima 34.1607 -118.139 \n",
|
|
"freq 891 74 25 25 \n",
|
|
"mean NaN NaN NaN NaN \n",
|
|
"std NaN NaN NaN NaN \n",
|
|
"min NaN NaN NaN NaN \n",
|
|
"25% NaN NaN NaN NaN \n",
|
|
"50% NaN NaN NaN NaN \n",
|
|
"75% NaN NaN NaN NaN \n",
|
|
"max NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" software geoname version subjects date \\\n",
|
|
"count 4700 4730 5444 1289 5429 \n",
|
|
"unique 31 126 53 938 4898 \n",
|
|
"top dspace geoname_2_US other K1 2006-05-04 10:48:14 \n",
|
|
"freq 2341 845 4841 53 99 \n",
|
|
"mean NaN NaN NaN NaN NaN \n",
|
|
"std NaN NaN NaN NaN NaN \n",
|
|
"min NaN NaN NaN NaN NaN \n",
|
|
"25% NaN NaN NaN NaN NaN \n",
|
|
"50% NaN NaN NaN NaN NaN \n",
|
|
"75% NaN NaN NaN NaN NaN \n",
|
|
"max NaN NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" note \\\n",
|
|
"count 218 \n",
|
|
"unique 210 \n",
|
|
"top ¿Quién puede depositar documentos en el reposi... \n",
|
|
"freq 2 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN \n",
|
|
"\n",
|
|
" suggestions activity_low \\\n",
|
|
"count 189 2288 \n",
|
|
"unique 173 72 \n",
|
|
"top This repository is hosted by the Texas Digital... 0 \n",
|
|
"freq 9 2012 \n",
|
|
"mean NaN NaN \n",
|
|
"std NaN NaN \n",
|
|
"min NaN NaN \n",
|
|
"25% NaN NaN \n",
|
|
"50% NaN NaN \n",
|
|
"75% NaN NaN \n",
|
|
"max NaN NaN \n",
|
|
"\n",
|
|
" activity_medium activity_high recordcount \\\n",
|
|
"count 2288 2288 2290 \n",
|
|
"unique 54 16 741 \n",
|
|
"top 0 0 100 \n",
|
|
"freq 2074 2210 730 \n",
|
|
"mean NaN NaN NaN \n",
|
|
"std NaN NaN NaN \n",
|
|
"min NaN NaN NaN \n",
|
|
"25% NaN NaN NaN \n",
|
|
"50% NaN NaN NaN \n",
|
|
"75% NaN NaN NaN \n",
|
|
"max NaN NaN NaN \n",
|
|
"\n",
|
|
" recordhistory fulltexts_total \\\n",
|
|
"count 2288 270 \n",
|
|
"unique 1702 135 \n",
|
|
"top 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 0 \n",
|
|
"freq 95 113 \n",
|
|
"mean NaN NaN \n",
|
|
"std NaN NaN \n",
|
|
"min NaN NaN \n",
|
|
"25% NaN NaN \n",
|
|
"50% NaN NaN \n",
|
|
"75% NaN NaN \n",
|
|
"max NaN NaN \n",
|
|
"\n",
|
|
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name \\\n",
|
|
"count 258 270 258 4605 \n",
|
|
"unique 118 134 117 8 \n",
|
|
"top 0 0 0 [celestial, opendoar] \n",
|
|
"freq 114 113 114 2106 \n",
|
|
"mean NaN NaN NaN NaN \n",
|
|
"std NaN NaN NaN NaN \n",
|
|
"min NaN NaN NaN NaN \n",
|
|
"25% NaN NaN NaN NaN \n",
|
|
"50% NaN NaN NaN NaN \n",
|
|
"75% NaN NaN NaN NaN \n",
|
|
"max NaN NaN NaN NaN \n",
|
|
"\n",
|
|
" registry_id submit_to submitted_to_name \\\n",
|
|
"count 4580 375 205 \n",
|
|
"unique 4259 7 1 \n",
|
|
"top [1879, 2246] [celestial, opendoar, roarmap] opendoar \n",
|
|
"freq 4 119 205 \n",
|
|
"mean NaN NaN NaN \n",
|
|
"std NaN NaN NaN \n",
|
|
"min NaN NaN NaN \n",
|
|
"25% NaN NaN NaN \n",
|
|
"50% NaN NaN NaN \n",
|
|
"75% NaN NaN NaN \n",
|
|
"max NaN NaN NaN \n",
|
|
"\n",
|
|
" submitted_to_done webometrics_rank webometrics_size \\\n",
|
|
"count 205 148 148 \n",
|
|
"unique 1 148 148 \n",
|
|
"top 2021-01-25 24 46 \n",
|
|
"freq 205 1 1 \n",
|
|
"mean NaN NaN NaN \n",
|
|
"std NaN NaN NaN \n",
|
|
"min NaN NaN NaN \n",
|
|
"25% NaN NaN NaN \n",
|
|
"50% NaN NaN NaN \n",
|
|
"75% NaN NaN NaN \n",
|
|
"max NaN NaN NaN \n",
|
|
"\n",
|
|
" webometrics_visibility webometrics_rich_files webometrics_scholar \\\n",
|
|
"count 148 148 148 \n",
|
|
"unique 148 146 143 \n",
|
|
"top 20 824 806 \n",
|
|
"freq 1 3 5 \n",
|
|
"mean NaN NaN NaN \n",
|
|
"std NaN NaN NaN \n",
|
|
"min NaN NaN NaN \n",
|
|
"25% NaN NaN NaN \n",
|
|
"50% NaN NaN NaN \n",
|
|
"75% NaN NaN NaN \n",
|
|
"max NaN NaN NaN \n",
|
|
"\n",
|
|
" monthly_deposits total_deposits \\\n",
|
|
"count 756 756 \n",
|
|
"unique 346 342 \n",
|
|
"top 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 0 \n",
|
|
"freq 387 387 \n",
|
|
"mean NaN NaN \n",
|
|
"std NaN NaN \n",
|
|
"min NaN NaN \n",
|
|
"25% NaN NaN \n",
|
|
"50% NaN NaN \n",
|
|
"75% NaN NaN \n",
|
|
"max NaN NaN \n",
|
|
"\n",
|
|
" association \n",
|
|
"count 223 \n",
|
|
"unique 3 \n",
|
|
"top russell_group \n",
|
|
"freq 130 \n",
|
|
"mean NaN \n",
|
|
"std NaN \n",
|
|
"min NaN \n",
|
|
"25% NaN \n",
|
|
"50% NaN \n",
|
|
"75% NaN \n",
|
|
"max NaN "
|
|
]
|
|
},
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"roar_df.describe(include='all')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"eprintid 0\n",
|
|
"rev_number 0\n",
|
|
"eprint_status 0\n",
|
|
"userid 0\n",
|
|
"importid 5444\n",
|
|
"source 5444\n",
|
|
"dir 0\n",
|
|
"datestamp 0\n",
|
|
"lastmod 0\n",
|
|
"status_changed 0\n",
|
|
"type 0\n",
|
|
"succeeds 5336\n",
|
|
"commentary 5444\n",
|
|
"metadata_visibility 0\n",
|
|
"latitude 5444\n",
|
|
"longitude 5444\n",
|
|
"relation_type 5444\n",
|
|
"relation_uri 5444\n",
|
|
"item_issues_id 5381\n",
|
|
"item_issues_type 5381\n",
|
|
"item_issues_description 5381\n",
|
|
"item_issues_timestamp 5381\n",
|
|
"item_issues_status 5381\n",
|
|
"item_issues_reported_by 5444\n",
|
|
"item_issues_resolved_by 5444\n",
|
|
"item_issues_comment 5444\n",
|
|
"item_issues_count 3202\n",
|
|
"sword_depositor 5444\n",
|
|
"sword_slug 5444\n",
|
|
"exemplar 5176\n",
|
|
"home_page 7\n",
|
|
"title 2\n",
|
|
"oai_pmh 1112\n",
|
|
"sword_endpoint 5266\n",
|
|
"rss_feed 3906\n",
|
|
"twitter_feed 5328\n",
|
|
"description 1607\n",
|
|
"fulltext 1247\n",
|
|
"open_access 1247\n",
|
|
"mandate 1698\n",
|
|
"dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"roar_df.isna().sum()[:40]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"organisation_title 984\n",
|
|
"organisation_home_page 1158\n",
|
|
"location_country 306\n",
|
|
"location_city 1730\n",
|
|
"location_latitude 1719\n",
|
|
"location_longitude 1736\n",
|
|
"software 744\n",
|
|
"geoname 714\n",
|
|
"version 0\n",
|
|
"subjects 4155\n",
|
|
"date 15\n",
|
|
"note 5226\n",
|
|
"suggestions 5255\n",
|
|
"activity_low 3156\n",
|
|
"activity_medium 3156\n",
|
|
"activity_high 3156\n",
|
|
"recordcount 3154\n",
|
|
"recordhistory 3156\n",
|
|
"fulltexts_total 5174\n",
|
|
"fulltexts_docs 5186\n",
|
|
"fulltexts_rtotal 5174\n",
|
|
"fulltexts_rdocs 5186\n",
|
|
"registry_name 839\n",
|
|
"registry_id 864\n",
|
|
"submit_to 5069\n",
|
|
"submitted_to_name 5239\n",
|
|
"submitted_to_done 5239\n",
|
|
"webometrics_rank 5296\n",
|
|
"webometrics_size 5296\n",
|
|
"webometrics_visibility 5296\n",
|
|
"webometrics_rich_files 5296\n",
|
|
"webometrics_scholar 5296\n",
|
|
"monthly_deposits 4688\n",
|
|
"total_deposits 4688\n",
|
|
"association 5221\n",
|
|
"dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"roar_df.isna().sum()[40:]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"type\n",
|
|
"database 74\n",
|
|
"demonstration 20\n",
|
|
"institutional 3853\n",
|
|
"journal 125\n",
|
|
"learning 77\n",
|
|
"multi 143\n",
|
|
"opendata 41\n",
|
|
"other 410\n",
|
|
"researchdata 55\n",
|
|
"subject 295\n",
|
|
"theses 349\n",
|
|
"webobservatory 2\n",
|
|
"dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.DataFrame(roar_df.type).groupby('type').size()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"open_access\n",
|
|
"FALSE 1501\n",
|
|
"TRUE 2696\n",
|
|
"dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.DataFrame(roar_df.open_access).groupby('open_access').size()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"mandate\n",
|
|
"FALSE 2748\n",
|
|
"TRUE 998\n",
|
|
"dtype: int64"
|
|
]
|
|
},
|
|
"execution_count": 14,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"pd.DataFrame(roar_df.mandate).groupby('mandate').size()"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"interpreter": {
|
|
"hash": "ccdc3acc266150d74575e7f25ef162b022ec22dae7e3244cf5a4f2ecbaf21c19"
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3.9.12 ('data-science')",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.9.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|