{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import ast\n", "import csv\n", "import json\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px\n", "\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading datasets" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidrev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
0921517archive1NaNNaNdisk0/00/00/09/212010-01-06 13:44:552016-04-17 21:54:112010-01-06 13:44:55thesesNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNFALSEhttp://alcme.oclc.org/ndltd/index.htmlNetworked Digital Library of Theses and Disser...http://alcme.oclc.org/ndltd/servlet/OAIHandlerNaNNaNNaNNaNTRUETRUENaNNaNNaNusNaNNaNNaNetddbgeoname_2_USotherNaN2006-04-18NaNNaN00015334000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,94862,168074,5...NaNNaNNaNNaNcelestial5NaNNaNNaNNaNNaNNaNNaNNaN0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...0NaN
11489556archive1NaNNaNdisk0/00/00/14/892010-01-06 13:46:052016-04-17 21:49:002010-01-06 13:46:05otherNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://prensahistorica.mcu.es/prensahistorica/...Virtual Library of Historical Presshttp://prensahistorica.mcu.es/prensahistorica/...NaNNaNNaNThe Virtual Library of Historical Press is the...TRUETRUENaNNaNNaNesNaNNaNNaNothergeoname_2_ESotherNaN2006-04-04 00:00:00NaNNaN00010084860,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1431,1431,1761...NaNNaNNaNNaNcelestial858NaNNaNNaNNaNNaNNaNNaNNaN0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...47500NaN
2606657archive1NaNNaNdisk0/00/00/06/062010-01-06 13:44:312016-04-17 21:53:142010-01-06 13:44:31subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://hal.archives-ouvertes.fr/HAL: Hyper Article en Lignehttp://hal.archives-ouvertes.fr/oai/oai.phpNaNNaNNaNNaNTRUETRUENaNNaNNaNfrNaNNaNNaNhalgeoname_2_FRotherNaN1998-11-02 11:53:57NaNNaN0026758164,12,17,26,43,57,81,185,431,861,1184,1517,2442...NaNNaNNaNNaNopendoar166NaNNaNNaN161110,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...3063NaN
3606NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNcelestial1106NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4606NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNroarmap69NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " eprintid rev_number eprint_status userid importid source dir \\\n", "0 921 517 archive 1 NaN NaN disk0/00/00/09/21 \n", "1 1489 556 archive 1 NaN NaN disk0/00/00/14/89 \n", "2 606 657 archive 1 NaN NaN disk0/00/00/06/06 \n", "3 606 NaN NaN NaN NaN NaN NaN \n", "4 606 NaN NaN NaN NaN NaN NaN \n", "\n", " datestamp lastmod status_changed type \\\n", "0 2010-01-06 13:44:55 2016-04-17 21:54:11 2010-01-06 13:44:55 theses \n", "1 2010-01-06 13:46:05 2016-04-17 21:49:00 2010-01-06 13:46:05 other \n", "2 2010-01-06 13:44:31 2016-04-17 21:53:14 2010-01-06 13:44:31 subject \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " succeeds commentary metadata_visibility latitude longitude relation_type \\\n", "0 NaN NaN show NaN NaN NaN \n", "1 NaN NaN show NaN NaN NaN \n", "2 NaN NaN show NaN NaN NaN \n", "3 NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN NaN \n", "\n", " relation_uri item_issues_id item_issues_type item_issues_description \\\n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " item_issues_timestamp item_issues_status item_issues_reported_by \\\n", "0 NaN NaN NaN \n", "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " item_issues_resolved_by item_issues_comment item_issues_count \\\n", "0 NaN NaN 0 \n", "1 NaN NaN 0 \n", "2 NaN NaN 0 \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " sword_depositor sword_slug exemplar \\\n", "0 NaN NaN FALSE \n", "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " home_page \\\n", "0 http://alcme.oclc.org/ndltd/index.html \n", "1 http://prensahistorica.mcu.es/prensahistorica/... \n", "2 http://hal.archives-ouvertes.fr/ \n", "3 NaN \n", "4 NaN \n", "\n", " title \\\n", "0 Networked Digital Library of Theses and Disser... \n", "1 Virtual Library of Historical Press \n", "2 HAL: Hyper Article en Ligne \n", "3 NaN \n", "4 NaN \n", "\n", " oai_pmh sword_endpoint rss_feed \\\n", "0 http://alcme.oclc.org/ndltd/servlet/OAIHandler NaN NaN \n", "1 http://prensahistorica.mcu.es/prensahistorica/... NaN NaN \n", "2 http://hal.archives-ouvertes.fr/oai/oai.php NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " twitter_feed description fulltext \\\n", "0 NaN NaN TRUE \n", "1 NaN The Virtual Library of Historical Press is the... TRUE \n", "2 NaN NaN TRUE \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " open_access mandate organisation_title organisation_home_page \\\n", "0 TRUE NaN NaN NaN \n", "1 TRUE NaN NaN NaN \n", "2 TRUE NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " location_country location_city location_latitude location_longitude \\\n", "0 us NaN NaN NaN \n", "1 es NaN NaN NaN \n", "2 fr NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " software geoname version subjects date note \\\n", "0 etddb geoname_2_US other NaN 2006-04-18 NaN \n", "1 other geoname_2_ES other NaN 2006-04-04 00:00:00 NaN \n", "2 hal geoname_2_FR other NaN 1998-11-02 11:53:57 NaN \n", "3 NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN NaN \n", "\n", " suggestions activity_low activity_medium activity_high recordcount \\\n", "0 NaN 0 0 0 1533400 \n", "1 NaN 0 0 0 1008486 \n", "2 NaN 0 0 2 675816 \n", "3 NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN \n", "\n", " recordhistory fulltexts_total \\\n", "0 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,94862,168074,5... NaN \n", "1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1431,1431,1761... NaN \n", "2 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name registry_id \\\n", "0 NaN NaN NaN celestial 5 \n", "1 NaN NaN NaN celestial 858 \n", "2 NaN NaN NaN opendoar 166 \n", "3 NaN NaN NaN celestial 1106 \n", "4 NaN NaN NaN roarmap 69 \n", "\n", " submit_to submitted_to_name submitted_to_done webometrics_rank \\\n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN 1 \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " webometrics_size webometrics_visibility webometrics_rich_files \\\n", "0 NaN NaN NaN \n", "1 NaN NaN NaN \n", "2 6 1 1 \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " webometrics_scholar monthly_deposits \\\n", "0 NaN 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n", "1 NaN 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n", "2 1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " total_deposits association \n", "0 0 NaN \n", "1 47500 NaN \n", "2 3063 NaN \n", "3 NaN NaN \n", "4 NaN NaN " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df = pd.read_csv('../data/raw/export_roar_CSV.csv', dtype='str')\n", "roar_df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['eprintid', 'rev_number', 'eprint_status', 'userid', 'importid',\n", " 'source', 'dir', 'datestamp', 'lastmod', 'status_changed', 'type',\n", " 'succeeds', 'commentary', 'metadata_visibility', 'latitude',\n", " 'longitude', 'relation_type', 'relation_uri', 'item_issues_id',\n", " 'item_issues_type', 'item_issues_description', 'item_issues_timestamp',\n", " 'item_issues_status', 'item_issues_reported_by',\n", " 'item_issues_resolved_by', 'item_issues_comment', 'item_issues_count',\n", " 'sword_depositor', 'sword_slug', 'exemplar', 'home_page', 'title',\n", " 'oai_pmh', 'sword_endpoint', 'rss_feed', 'twitter_feed', 'description',\n", " 'fulltext', 'open_access', 'mandate', 'organisation_title',\n", " 'organisation_home_page', 'location_country', 'location_city',\n", " 'location_latitude', 'location_longitude', 'software', 'geoname',\n", " 'version', 'subjects', 'date', 'note', 'suggestions', 'activity_low',\n", " 'activity_medium', 'activity_high', 'recordcount', 'recordhistory',\n", " 'fulltexts_total', 'fulltexts_docs', 'fulltexts_rtotal',\n", " 'fulltexts_rdocs', 'registry_name', 'registry_id', 'submit_to',\n", " 'submitted_to_name', 'submitted_to_done', 'webometrics_rank',\n", " 'webometrics_size', 'webometrics_visibility', 'webometrics_rich_files',\n", " 'webometrics_scholar', 'monthly_deposits', 'total_deposits',\n", " 'association'],\n", " dtype='object')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df.columns" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidrev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
count1807954445444544400544454445444544454441080544400008686868686000224200268543754424567178153811638374197419737465253496552683839379837804700473054441094154292181892288228822882290228827025827025873937258730205205148148148148148756756237
unique54446601218900544441984043423012108020000513683200040025271514342781721485112335922244984395136189629653003311265323748982101737254167411702135118134117351653111481481481461433463422
top1211811archive1NaNNaNdisk0/00/00/09/212010-01-06 13:43:482016-04-17 21:55:192010-01-06 13:43:48institutional3164NaNshowNaNNaNNaNNaNbad_oai_pmh_url_0duplicate_titleSimilar title to <xhtml:table xmlns:xhtml=\"htt...2010-01-13 10:44:49discoveredNaNNaNNaN0NaNNaNFALSEhttp://eprints.upnjatim.ac.id/Repositorio Institucionalhttp://virtuelcampus.univ-msila.dz/fllhttp://producao.usp.br/sword/servicedocumenthttp://eprints.upnjatim.ac.id/cgi/latest_tool?...http://twitter.com/bu_ufscinfo:other:archives.eprints.org:importTRUETRUEFALSEChinese Academy of Science (中国科学院)http://www.cas.cn/usLima34.1607-118.139dspacegeoname_2_USotherL12006-05-04 10:48:14¿Quién puede depositar documentos en el reposi...This repository is hosted by the Texas Digital...0001000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...0000celestial184celestialopendoar2021-01-251618248060,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...0russell_group
freq23833354441330NaNNaN11681638531NaN5402NaNNaNNaNNaN184736853NaNNaNNaN2201NaNNaN26147525211228052696274899907762525234184548413629929201220742210730951131141131143741728320520511135387387144
\n", "
" ], "text/plain": [ " eprintid rev_number eprint_status userid importid source \\\n", "count 18079 5444 5444 5444 0 0 \n", "unique 5444 660 1 2189 0 0 \n", "top 12118 11 archive 1 NaN NaN \n", "freq 238 333 5444 1330 NaN NaN \n", "\n", " dir datestamp lastmod \\\n", "count 5444 5444 5444 \n", "unique 5444 4198 4043 \n", "top disk0/00/00/09/21 2010-01-06 13:43:48 2016-04-17 21:55:19 \n", "freq 1 16 8 \n", "\n", " status_changed type succeeds commentary \\\n", "count 5444 5444 108 0 \n", "unique 4230 12 108 0 \n", "top 2010-01-06 13:43:48 institutional 3164 NaN \n", "freq 16 3853 1 NaN \n", "\n", " metadata_visibility latitude longitude relation_type relation_uri \\\n", "count 5444 0 0 0 0 \n", "unique 2 0 0 0 0 \n", "top show NaN NaN NaN NaN \n", "freq 5402 NaN NaN NaN NaN \n", "\n", " item_issues_id item_issues_type \\\n", "count 86 86 \n", "unique 51 3 \n", "top bad_oai_pmh_url_0 duplicate_title \n", "freq 18 47 \n", "\n", " item_issues_description \\\n", "count 86 \n", "unique 68 \n", "top Similar title to \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidrev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
2606657archive1NaNNaNdisk0/00/00/06/062010-01-06 13:44:312016-04-17 21:53:142010-01-06 13:44:31subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://hal.archives-ouvertes.fr/HAL: Hyper Article en Lignehttp://hal.archives-ouvertes.fr/oai/oai.phpNaNNaNNaNNaNTRUETRUENaNNaNNaNfrNaNNaNNaNhalgeoname_2_FRotherNaN1998-11-02 11:53:57NaNNaN0026758164,12,17,26,43,57,81,185,431,861,1184,1517,2442...NaNNaNNaNNaNopendoar166NaNNaNNaN161110,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...3063NaN
3606NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNcelestial1106NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4606NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNroarmap69NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "" ], "text/plain": [ " eprintid rev_number eprint_status userid importid source dir \\\n", "2 606 657 archive 1 NaN NaN disk0/00/00/06/06 \n", "3 606 NaN NaN NaN NaN NaN NaN \n", "4 606 NaN NaN NaN NaN NaN NaN \n", "\n", " datestamp lastmod status_changed type \\\n", "2 2010-01-06 13:44:31 2016-04-17 21:53:14 2010-01-06 13:44:31 subject \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " succeeds commentary metadata_visibility latitude longitude relation_type \\\n", "2 NaN NaN show NaN NaN NaN \n", "3 NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN NaN \n", "\n", " relation_uri item_issues_id item_issues_type item_issues_description \\\n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " item_issues_timestamp item_issues_status item_issues_reported_by \\\n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " item_issues_resolved_by item_issues_comment item_issues_count \\\n", "2 NaN NaN 0 \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " sword_depositor sword_slug exemplar home_page \\\n", "2 NaN NaN NaN http://hal.archives-ouvertes.fr/ \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " title oai_pmh \\\n", "2 HAL: Hyper Article en Ligne http://hal.archives-ouvertes.fr/oai/oai.php \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " sword_endpoint rss_feed twitter_feed description fulltext open_access \\\n", "2 NaN NaN NaN NaN TRUE TRUE \n", "3 NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN NaN \n", "\n", " mandate organisation_title organisation_home_page location_country \\\n", "2 NaN NaN NaN fr \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " location_city location_latitude location_longitude software geoname \\\n", "2 NaN NaN NaN hal geoname_2_FR \n", "3 NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN \n", "\n", " version subjects date note suggestions activity_low \\\n", "2 other NaN 1998-11-02 11:53:57 NaN NaN 0 \n", "3 NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN NaN \n", "\n", " activity_medium activity_high recordcount \\\n", "2 0 2 675816 \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " recordhistory fulltexts_total \\\n", "2 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name registry_id \\\n", "2 NaN NaN NaN opendoar 166 \n", "3 NaN NaN NaN celestial 1106 \n", "4 NaN NaN NaN roarmap 69 \n", "\n", " submit_to submitted_to_name submitted_to_done webometrics_rank \\\n", "2 NaN NaN NaN 1 \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " webometrics_size webometrics_visibility webometrics_rich_files \\\n", "2 6 1 1 \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " webometrics_scholar monthly_deposits \\\n", "2 1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " total_deposits association \n", "2 3063 NaN \n", "3 NaN NaN \n", "4 NaN NaN " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df[roar_df.eprintid == '606']" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
eprintid
1{nan, 633}{nan, archive}{nan, 1}{nan}{nan}{nan, disk0/00/00/00/01}{nan, 2010-01-06 13:43:48}{nan, 2011-07-18 05:40:07}{nan, 2010-01-06 13:43:48}{subject, nan}{nan}{nan}{nan, show}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan, 0}{nan}{nan}{nan}{nan, http://archivesic.ccsd.cnrs.fr/}{nan, @RCHIVESIC }{nan, http://archivesic.ccsd.cnrs.fr/oai/oai.php}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan, fr}{nan}{nan}{nan}{nan, hal}{nan, geoname_2_FR}{nan, other}{nan}{nan, 2002-05-17 19:24:41}{nan}{nan}{nan, 0}{nan, 0}{nan, 0}{nan, 25}{nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...{nan}{nan}{nan}{nan}{celestial, opendoar}{669, 58}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}
10{nan, 511}{nan, archive}{nan, 1}{nan}{nan}{nan, disk0/00/00/00/10}{nan, 2010-01-06 13:43:48}{nan, 2011-07-18 05:40:13}{nan, 2010-01-06 13:43:48}{nan, institutional}{nan}{nan}{nan, show}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan, 0}{nan}{nan}{nan}{nan, http://www.diva-portal.org/mdh/}{nan, Academic Archive On-line (Mälardalen Uni...{http://www.diva-portal.org/oai/mdh/OAI, nan}{nan}{nan}{nan}{nan}{nan, TRUE}{nan, TRUE}{nan}{nan}{nan}{se, nan}{nan, Uppsala}{nan, 59.8667}{nan, 17.6333}{nan, diva}{nan, geoname_2_SE}{nan, other}{nan}{nan, 2005-12-08 13:15:22}{nan}{nan}{nan, 0}{nan, 0}{nan, 0}{nan, 100}{nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,1...{nan}{nan}{nan}{nan}{celestial, opendoar}{526, 258}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}
1000{274}{archive}{1}{nan}{nan}{disk0/00/00/10/00}{2010-01-06 13:45:01}{2011-07-06 08:21:21}{2010-01-06 13:45:01}{subject}{nan}{nan}{show}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{0}{nan}{nan}{nan}{http://pam.pisharp.org/}{PAM - Portuguese Archive of Mathematics}{nan}{nan}{nan}{nan}{nan}{TRUE}{TRUE}{nan}{nan}{nan}{pt}{Bellevue, WA}{47.6034}{-122.155}{dspace}{geoname_2_PT}{other}{nan}{2006-05-04 10:48:14}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}
10001{nan, 20}{nan, archive}{nan, 91}{nan}{nan}{nan, disk0/00/01/00/01}{2015-08-08 14:52:11, nan}{nan, 2016-03-21 19:44:01}{2015-08-08 14:52:11, nan}{subject, nan}{nan}{nan}{nan, show}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan, http://edoc.sub.uni-hamburg.de/klimawand...{nan, Klimawandel Dokumentenserver}{nan, http://edoc.sub.uni-hamburg.de/klimawand...{nan}{nan}{nan}{The \"Documentenserver Klimawandel\" (Repositor...{nan, TRUE}{nan, TRUE}{nan, TRUE}{KLIMZUG projects, nan, Climate Service Center...{http://www.climateservicecenter.de/, http://w...{nan, de}{nan, Hamburg}{nan, 53.5511}{nan, 9.9937}{nan, opus}{nan, geoname_2_DE}{nan, other}{GF, GE, G1, HD, S1}{nan, 2015-07-02 08:08:31}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan, celestial, opendoar}{nan, 5881, 3408}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}
10008{11}{archive}{404}{nan}{nan}{disk0/00/01/00/08}{2015-08-08 14:52:26}{2016-03-21 19:43:51}{2015-08-08 14:52:26}{institutional}{nan}{nan}{show}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{http://creativematter.skidmore.edu/}{Creative Matter | Skidmore College Research}{http://creativematter.skidmore.edu/do/oai/}{nan}{http://creativematter.skidmore.edu/recent.rss}{nan}{Welcome to Creative Matter, a repository for ...{TRUE}{FALSE}{FALSE}{Skidmore College}{http://www.skidmore.edu/}{us}{Saratoga Springs}{43.0961}{-73.7818}{bepress}{geoname_2_US}{other}{nan}{2015-07-06 17:35:50}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{celestial}{5882}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}
\n", "
" ], "text/plain": [ " rev_number eprint_status userid importid source \\\n", "eprintid \n", "1 {nan, 633} {nan, archive} {nan, 1} {nan} {nan} \n", "10 {nan, 511} {nan, archive} {nan, 1} {nan} {nan} \n", "1000 {274} {archive} {1} {nan} {nan} \n", "10001 {nan, 20} {nan, archive} {nan, 91} {nan} {nan} \n", "10008 {11} {archive} {404} {nan} {nan} \n", "\n", " dir datestamp \\\n", "eprintid \n", "1 {nan, disk0/00/00/00/01} {nan, 2010-01-06 13:43:48} \n", "10 {nan, disk0/00/00/00/10} {nan, 2010-01-06 13:43:48} \n", "1000 {disk0/00/00/10/00} {2010-01-06 13:45:01} \n", "10001 {nan, disk0/00/01/00/01} {2015-08-08 14:52:11, nan} \n", "10008 {disk0/00/01/00/08} {2015-08-08 14:52:26} \n", "\n", " lastmod status_changed \\\n", "eprintid \n", "1 {nan, 2011-07-18 05:40:07} {nan, 2010-01-06 13:43:48} \n", "10 {nan, 2011-07-18 05:40:13} {nan, 2010-01-06 13:43:48} \n", "1000 {2011-07-06 08:21:21} {2010-01-06 13:45:01} \n", "10001 {nan, 2016-03-21 19:44:01} {2015-08-08 14:52:11, nan} \n", "10008 {2016-03-21 19:43:51} {2015-08-08 14:52:26} \n", "\n", " type succeeds commentary metadata_visibility \\\n", "eprintid \n", "1 {subject, nan} {nan} {nan} {nan, show} \n", "10 {nan, institutional} {nan} {nan} {nan, show} \n", "1000 {subject} {nan} {nan} {show} \n", "10001 {subject, nan} {nan} {nan} {nan, show} \n", "10008 {institutional} {nan} {nan} {show} \n", "\n", " latitude longitude relation_type relation_uri item_issues_id \\\n", "eprintid \n", "1 {nan} {nan} {nan} {nan} {nan} \n", "10 {nan} {nan} {nan} {nan} {nan} \n", "1000 {nan} {nan} {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} {nan} {nan} \n", "\n", " item_issues_type item_issues_description item_issues_timestamp \\\n", "eprintid \n", "1 {nan} {nan} {nan} \n", "10 {nan} {nan} {nan} \n", "1000 {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} \n", "\n", " item_issues_status item_issues_reported_by item_issues_resolved_by \\\n", "eprintid \n", "1 {nan} {nan} {nan} \n", "10 {nan} {nan} {nan} \n", "1000 {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} \n", "\n", " item_issues_comment item_issues_count sword_depositor sword_slug \\\n", "eprintid \n", "1 {nan} {nan, 0} {nan} {nan} \n", "10 {nan} {nan, 0} {nan} {nan} \n", "1000 {nan} {0} {nan} {nan} \n", "10001 {nan} {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} {nan} \n", "\n", " exemplar home_page \\\n", "eprintid \n", "1 {nan} {nan, http://archivesic.ccsd.cnrs.fr/} \n", "10 {nan} {nan, http://www.diva-portal.org/mdh/} \n", "1000 {nan} {http://pam.pisharp.org/} \n", "10001 {nan} {nan, http://edoc.sub.uni-hamburg.de/klimawand... \n", "10008 {nan} {http://creativematter.skidmore.edu/} \n", "\n", " title \\\n", "eprintid \n", "1 {nan, @RCHIVESIC } \n", "10 {nan, Academic Archive On-line (Mälardalen Uni... \n", "1000 {PAM - Portuguese Archive of Mathematics} \n", "10001 {nan, Klimawandel Dokumentenserver} \n", "10008 {Creative Matter | Skidmore College Research} \n", "\n", " oai_pmh sword_endpoint \\\n", "eprintid \n", "1 {nan, http://archivesic.ccsd.cnrs.fr/oai/oai.php} {nan} \n", "10 {http://www.diva-portal.org/oai/mdh/OAI, nan} {nan} \n", "1000 {nan} {nan} \n", "10001 {nan, http://edoc.sub.uni-hamburg.de/klimawand... {nan} \n", "10008 {http://creativematter.skidmore.edu/do/oai/} {nan} \n", "\n", " rss_feed twitter_feed \\\n", "eprintid \n", "1 {nan} {nan} \n", "10 {nan} {nan} \n", "1000 {nan} {nan} \n", "10001 {nan} {nan} \n", "10008 {http://creativematter.skidmore.edu/recent.rss} {nan} \n", "\n", " description fulltext \\\n", "eprintid \n", "1 {nan} {nan} \n", "10 {nan} {nan, TRUE} \n", "1000 {nan} {TRUE} \n", "10001 {The \"Documentenserver Klimawandel\" (Repositor... {nan, TRUE} \n", "10008 {Welcome to Creative Matter, a repository for ... {TRUE} \n", "\n", " open_access mandate \\\n", "eprintid \n", "1 {nan} {nan} \n", "10 {nan, TRUE} {nan} \n", "1000 {TRUE} {nan} \n", "10001 {nan, TRUE} {nan, TRUE} \n", "10008 {FALSE} {FALSE} \n", "\n", " organisation_title \\\n", "eprintid \n", "1 {nan} \n", "10 {nan} \n", "1000 {nan} \n", "10001 {KLIMZUG projects, nan, Climate Service Center... \n", "10008 {Skidmore College} \n", "\n", " organisation_home_page location_country \\\n", "eprintid \n", "1 {nan} {nan, fr} \n", "10 {nan} {se, nan} \n", "1000 {nan} {pt} \n", "10001 {http://www.climateservicecenter.de/, http://w... {nan, de} \n", "10008 {http://www.skidmore.edu/} {us} \n", "\n", " location_city location_latitude location_longitude \\\n", "eprintid \n", "1 {nan} {nan} {nan} \n", "10 {nan, Uppsala} {nan, 59.8667} {nan, 17.6333} \n", "1000 {Bellevue, WA} {47.6034} {-122.155} \n", "10001 {nan, Hamburg} {nan, 53.5511} {nan, 9.9937} \n", "10008 {Saratoga Springs} {43.0961} {-73.7818} \n", "\n", " software geoname version \\\n", "eprintid \n", "1 {nan, hal} {nan, geoname_2_FR} {nan, other} \n", "10 {nan, diva} {nan, geoname_2_SE} {nan, other} \n", "1000 {dspace} {geoname_2_PT} {other} \n", "10001 {nan, opus} {nan, geoname_2_DE} {nan, other} \n", "10008 {bepress} {geoname_2_US} {other} \n", "\n", " subjects date note suggestions \\\n", "eprintid \n", "1 {nan} {nan, 2002-05-17 19:24:41} {nan} {nan} \n", "10 {nan} {nan, 2005-12-08 13:15:22} {nan} {nan} \n", "1000 {nan} {2006-05-04 10:48:14} {nan} {nan} \n", "10001 {GF, GE, G1, HD, S1} {nan, 2015-07-02 08:08:31} {nan} {nan} \n", "10008 {nan} {2015-07-06 17:35:50} {nan} {nan} \n", "\n", " activity_low activity_medium activity_high recordcount \\\n", "eprintid \n", "1 {nan, 0} {nan, 0} {nan, 0} {nan, 25} \n", "10 {nan, 0} {nan, 0} {nan, 0} {nan, 100} \n", "1000 {nan} {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} {nan} \n", "\n", " recordhistory fulltexts_total \\\n", "eprintid \n", "1 {nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... {nan} \n", "10 {nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,1... {nan} \n", "1000 {nan} {nan} \n", "10001 {nan} {nan} \n", "10008 {nan} {nan} \n", "\n", " fulltexts_docs fulltexts_rtotal fulltexts_rdocs \\\n", "eprintid \n", "1 {nan} {nan} {nan} \n", "10 {nan} {nan} {nan} \n", "1000 {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} \n", "\n", " registry_name registry_id submit_to \\\n", "eprintid \n", "1 {celestial, opendoar} {669, 58} {nan} \n", "10 {celestial, opendoar} {526, 258} {nan} \n", "1000 {nan} {nan} {nan} \n", "10001 {nan, celestial, opendoar} {nan, 5881, 3408} {nan} \n", "10008 {celestial} {5882} {nan} \n", "\n", " submitted_to_name submitted_to_done webometrics_rank \\\n", "eprintid \n", "1 {nan} {nan} {nan} \n", "10 {nan} {nan} {nan} \n", "1000 {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} \n", "\n", " webometrics_size webometrics_visibility webometrics_rich_files \\\n", "eprintid \n", "1 {nan} {nan} {nan} \n", "10 {nan} {nan} {nan} \n", "1000 {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} \n", "\n", " webometrics_scholar monthly_deposits total_deposits association \n", "eprintid \n", "1 {nan} {nan} {nan} {nan} \n", "10 {nan} {nan} {nan} {nan} \n", "1000 {nan} {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} {nan} " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df = roar_df.groupby('eprintid').aggregate(set)\n", "roar_df.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
eprintid
1633archive1NaNNaNdisk0/00/00/00/012010-01-06 13:43:482011-07-18 05:40:072010-01-06 13:43:48subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://archivesic.ccsd.cnrs.fr/@RCHIVESIChttp://archivesic.ccsd.cnrs.fr/oai/oai.phpNaNNaNNaNNaNNaNNaNNaNNaNNaNfrNaNNaNNaNhalgeoname_2_FRotherNaN2002-05-17 19:24:41NaNNaN000250,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...NaNNaNNaNNaN[celestial, opendoar][669, 58]NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
10511archive1NaNNaNdisk0/00/00/00/102010-01-06 13:43:482011-07-18 05:40:132010-01-06 13:43:48institutionalNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://www.diva-portal.org/mdh/Academic Archive On-line (Mälardalen Universit...http://www.diva-portal.org/oai/mdh/OAINaNNaNNaNNaNTRUETRUENaNNaNNaNseUppsala59.866717.6333divageoname_2_SEotherNaN2005-12-08 13:15:22NaNNaN0001000,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100...NaNNaNNaNNaN[celestial, opendoar][526, 258]NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1000274archive1NaNNaNdisk0/00/00/10/002010-01-06 13:45:012011-07-06 08:21:212010-01-06 13:45:01subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://pam.pisharp.org/PAM - Portuguese Archive of MathematicsNaNNaNNaNNaNNaNTRUETRUENaNNaNNaNptBellevue, WA47.6034-122.155dspacegeoname_2_PTotherNaN2006-05-04 10:48:14NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1000120archive91NaNNaNdisk0/00/01/00/012015-08-08 14:52:112016-03-21 19:44:012015-08-08 14:52:11subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNhttp://edoc.sub.uni-hamburg.de/klimawandel/Klimawandel Dokumentenserverhttp://edoc.sub.uni-hamburg.de/klimawandel/oaiNaNNaNNaNThe \"Documentenserver Klimawandel\" (Repository...TRUETRUETRUE[KLIMZUG projects, Climate Service Center 2.0,...[http://www.climateservicecenter.de/, http://w...deHamburg53.55119.9937opusgeoname_2_DEother[GE, GF, G1, HD, S1]2015-07-02 08:08:31NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN[celestial, opendoar][5881, 3408]NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1000811archive404NaNNaNdisk0/00/01/00/082015-08-08 14:52:262016-03-21 19:43:512015-08-08 14:52:26institutionalNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNhttp://creativematter.skidmore.edu/Creative Matter | Skidmore College Researchhttp://creativematter.skidmore.edu/do/oai/NaNhttp://creativematter.skidmore.edu/recent.rssNaNWelcome to Creative Matter, a repository for t...TRUEFALSEFALSESkidmore Collegehttp://www.skidmore.edu/usSaratoga Springs43.0961-73.7818bepressgeoname_2_USotherNaN2015-07-06 17:35:50NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNcelestial5882NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " rev_number eprint_status userid importid source dir \\\n", "eprintid \n", "1 633 archive 1 NaN NaN disk0/00/00/00/01 \n", "10 511 archive 1 NaN NaN disk0/00/00/00/10 \n", "1000 274 archive 1 NaN NaN disk0/00/00/10/00 \n", "10001 20 archive 91 NaN NaN disk0/00/01/00/01 \n", "10008 11 archive 404 NaN NaN disk0/00/01/00/08 \n", "\n", " datestamp lastmod status_changed \\\n", "eprintid \n", "1 2010-01-06 13:43:48 2011-07-18 05:40:07 2010-01-06 13:43:48 \n", "10 2010-01-06 13:43:48 2011-07-18 05:40:13 2010-01-06 13:43:48 \n", "1000 2010-01-06 13:45:01 2011-07-06 08:21:21 2010-01-06 13:45:01 \n", "10001 2015-08-08 14:52:11 2016-03-21 19:44:01 2015-08-08 14:52:11 \n", "10008 2015-08-08 14:52:26 2016-03-21 19:43:51 2015-08-08 14:52:26 \n", "\n", " type succeeds commentary metadata_visibility latitude \\\n", "eprintid \n", "1 subject NaN NaN show NaN \n", "10 institutional NaN NaN show NaN \n", "1000 subject NaN NaN show NaN \n", "10001 subject NaN NaN show NaN \n", "10008 institutional NaN NaN show NaN \n", "\n", " longitude relation_type relation_uri item_issues_id \\\n", "eprintid \n", "1 NaN NaN NaN NaN \n", "10 NaN NaN NaN NaN \n", "1000 NaN NaN NaN NaN \n", "10001 NaN NaN NaN NaN \n", "10008 NaN NaN NaN NaN \n", "\n", " item_issues_type item_issues_description item_issues_timestamp \\\n", "eprintid \n", "1 NaN NaN NaN \n", "10 NaN NaN NaN \n", "1000 NaN NaN NaN \n", "10001 NaN NaN NaN \n", "10008 NaN NaN NaN \n", "\n", " item_issues_status item_issues_reported_by item_issues_resolved_by \\\n", "eprintid \n", "1 NaN NaN NaN \n", "10 NaN NaN NaN \n", "1000 NaN NaN NaN \n", "10001 NaN NaN NaN \n", "10008 NaN NaN NaN \n", "\n", " item_issues_comment item_issues_count sword_depositor sword_slug \\\n", "eprintid \n", "1 NaN 0 NaN NaN \n", "10 NaN 0 NaN NaN \n", "1000 NaN 0 NaN NaN \n", "10001 NaN NaN NaN NaN \n", "10008 NaN NaN NaN NaN \n", "\n", " exemplar home_page \\\n", "eprintid \n", "1 NaN http://archivesic.ccsd.cnrs.fr/ \n", "10 NaN http://www.diva-portal.org/mdh/ \n", "1000 NaN http://pam.pisharp.org/ \n", "10001 NaN http://edoc.sub.uni-hamburg.de/klimawandel/ \n", "10008 NaN http://creativematter.skidmore.edu/ \n", "\n", " title \\\n", "eprintid \n", "1 @RCHIVESIC \n", "10 Academic Archive On-line (Mälardalen Universit... \n", "1000 PAM - Portuguese Archive of Mathematics \n", "10001 Klimawandel Dokumentenserver \n", "10008 Creative Matter | Skidmore College Research \n", "\n", " oai_pmh sword_endpoint \\\n", "eprintid \n", "1 http://archivesic.ccsd.cnrs.fr/oai/oai.php NaN \n", "10 http://www.diva-portal.org/oai/mdh/OAI NaN \n", "1000 NaN NaN \n", "10001 http://edoc.sub.uni-hamburg.de/klimawandel/oai NaN \n", "10008 http://creativematter.skidmore.edu/do/oai/ NaN \n", "\n", " rss_feed twitter_feed \\\n", "eprintid \n", "1 NaN NaN \n", "10 NaN NaN \n", "1000 NaN NaN \n", "10001 NaN NaN \n", "10008 http://creativematter.skidmore.edu/recent.rss NaN \n", "\n", " description fulltext \\\n", "eprintid \n", "1 NaN NaN \n", "10 NaN TRUE \n", "1000 NaN TRUE \n", "10001 The \"Documentenserver Klimawandel\" (Repository... TRUE \n", "10008 Welcome to Creative Matter, a repository for t... TRUE \n", "\n", " open_access mandate \\\n", "eprintid \n", "1 NaN NaN \n", "10 TRUE NaN \n", "1000 TRUE NaN \n", "10001 TRUE TRUE \n", "10008 FALSE FALSE \n", "\n", " organisation_title \\\n", "eprintid \n", "1 NaN \n", "10 NaN \n", "1000 NaN \n", "10001 [KLIMZUG projects, Climate Service Center 2.0,... \n", "10008 Skidmore College \n", "\n", " organisation_home_page location_country \\\n", "eprintid \n", "1 NaN fr \n", "10 NaN se \n", "1000 NaN pt \n", "10001 [http://www.climateservicecenter.de/, http://w... de \n", "10008 http://www.skidmore.edu/ us \n", "\n", " location_city location_latitude location_longitude software \\\n", "eprintid \n", "1 NaN NaN NaN hal \n", "10 Uppsala 59.8667 17.6333 diva \n", "1000 Bellevue, WA 47.6034 -122.155 dspace \n", "10001 Hamburg 53.5511 9.9937 opus \n", "10008 Saratoga Springs 43.0961 -73.7818 bepress \n", "\n", " geoname version subjects date \\\n", "eprintid \n", "1 geoname_2_FR other NaN 2002-05-17 19:24:41 \n", "10 geoname_2_SE other NaN 2005-12-08 13:15:22 \n", "1000 geoname_2_PT other NaN 2006-05-04 10:48:14 \n", "10001 geoname_2_DE other [GE, GF, G1, HD, S1] 2015-07-02 08:08:31 \n", "10008 geoname_2_US other NaN 2015-07-06 17:35:50 \n", "\n", " note suggestions activity_low activity_medium activity_high \\\n", "eprintid \n", "1 NaN NaN 0 0 0 \n", "10 NaN NaN 0 0 0 \n", "1000 NaN NaN NaN NaN NaN \n", "10001 NaN NaN NaN NaN NaN \n", "10008 NaN NaN NaN NaN NaN \n", "\n", " recordcount recordhistory \\\n", "eprintid \n", "1 25 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n", "10 100 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100... \n", "1000 NaN NaN \n", "10001 NaN NaN \n", "10008 NaN NaN \n", "\n", " fulltexts_total fulltexts_docs fulltexts_rtotal fulltexts_rdocs \\\n", "eprintid \n", "1 NaN NaN NaN NaN \n", "10 NaN NaN NaN NaN \n", "1000 NaN NaN NaN NaN \n", "10001 NaN NaN NaN NaN \n", "10008 NaN NaN NaN NaN \n", "\n", " registry_name registry_id submit_to submitted_to_name \\\n", "eprintid \n", "1 [celestial, opendoar] [669, 58] NaN NaN \n", "10 [celestial, opendoar] [526, 258] NaN NaN \n", "1000 NaN NaN NaN NaN \n", "10001 [celestial, opendoar] [5881, 3408] NaN NaN \n", "10008 celestial 5882 NaN NaN \n", "\n", " submitted_to_done webometrics_rank webometrics_size \\\n", "eprintid \n", "1 NaN NaN NaN \n", "10 NaN NaN NaN \n", "1000 NaN NaN NaN \n", "10001 NaN NaN NaN \n", "10008 NaN NaN NaN \n", "\n", " webometrics_visibility webometrics_rich_files webometrics_scholar \\\n", "eprintid \n", "1 NaN NaN NaN \n", "10 NaN NaN NaN \n", "1000 NaN NaN NaN \n", "10001 NaN NaN NaN \n", "10008 NaN NaN NaN \n", "\n", " monthly_deposits total_deposits association \n", "eprintid \n", "1 NaN NaN NaN \n", "10 NaN NaN NaN \n", "1000 NaN NaN NaN \n", "10001 NaN NaN NaN \n", "10008 NaN NaN NaN " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def value_or_list(cell_set):\n", " copy = set(cell_set)\n", " copy.discard(np.nan) \n", " if len(copy) == 0:\n", " return np.nan\n", " if len(copy) == 1:\n", " return copy.pop()\n", " return list(copy)\n", " \n", "roar_df = roar_df.applymap(value_or_list)\n", "roar_df.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidrev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
4259606657archive1NaNNaNdisk0/00/00/06/062010-01-06 13:44:312016-04-17 21:53:142010-01-06 13:44:31subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://hal.archives-ouvertes.fr/HAL: Hyper Article en Lignehttp://hal.archives-ouvertes.fr/oai/oai.phpNaNNaNNaNNaNTRUETRUENaNNaNNaNfrNaNNaNNaNhalgeoname_2_FRotherNaN1998-11-02 11:53:57NaNNaN0026758164,12,17,26,43,57,81,185,431,861,1184,1517,2442...NaNNaNNaNNaN[roarmap, celestial, opendoar][69, 166, 1106]NaNNaNNaN161110,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...3063NaN
\n", "
" ], "text/plain": [ " eprintid rev_number eprint_status userid importid source \\\n", "4259 606 657 archive 1 NaN NaN \n", "\n", " dir datestamp lastmod \\\n", "4259 disk0/00/00/06/06 2010-01-06 13:44:31 2016-04-17 21:53:14 \n", "\n", " status_changed type succeeds commentary metadata_visibility \\\n", "4259 2010-01-06 13:44:31 subject NaN NaN show \n", "\n", " latitude longitude relation_type relation_uri item_issues_id \\\n", "4259 NaN NaN NaN NaN NaN \n", "\n", " item_issues_type item_issues_description item_issues_timestamp \\\n", "4259 NaN NaN NaN \n", "\n", " item_issues_status item_issues_reported_by item_issues_resolved_by \\\n", "4259 NaN NaN NaN \n", "\n", " item_issues_comment item_issues_count sword_depositor sword_slug \\\n", "4259 NaN 0 NaN NaN \n", "\n", " exemplar home_page title \\\n", "4259 NaN http://hal.archives-ouvertes.fr/ HAL: Hyper Article en Ligne \n", "\n", " oai_pmh sword_endpoint rss_feed \\\n", "4259 http://hal.archives-ouvertes.fr/oai/oai.php NaN NaN \n", "\n", " twitter_feed description fulltext open_access mandate organisation_title \\\n", "4259 NaN NaN TRUE TRUE NaN NaN \n", "\n", " organisation_home_page location_country location_city location_latitude \\\n", "4259 NaN fr NaN NaN \n", "\n", " location_longitude software geoname version subjects \\\n", "4259 NaN hal geoname_2_FR other NaN \n", "\n", " date note suggestions activity_low activity_medium \\\n", "4259 1998-11-02 11:53:57 NaN NaN 0 0 \n", "\n", " activity_high recordcount \\\n", "4259 2 675816 \n", "\n", " recordhistory fulltexts_total \\\n", "4259 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n", "\n", " fulltexts_docs fulltexts_rtotal fulltexts_rdocs \\\n", "4259 NaN NaN NaN \n", "\n", " registry_name registry_id submit_to \\\n", "4259 [roarmap, celestial, opendoar] [69, 166, 1106] NaN \n", "\n", " submitted_to_name submitted_to_done webometrics_rank webometrics_size \\\n", "4259 NaN NaN 1 6 \n", "\n", " webometrics_visibility webometrics_rich_files webometrics_scholar \\\n", "4259 1 1 1 \n", "\n", " monthly_deposits total_deposits \\\n", "4259 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 3063 \n", "\n", " association \n", "4259 NaN " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df.reset_index(inplace=True)\n", "roar_df[roar_df.eprintid == '606']" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidrev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
count54445444544454440.00.0544454445444544454441080.054440.00.00.00.063636363630.00.00.022420.00.026854375442433217815381163837419741973746446042865138371437253708470047305444128954292181892288228822882290228827025827025846054580375205205148148148148148756756223
unique544466012189NaNNaN544441984043423012108NaN2NaNNaNNaNNaN4856243NaNNaNNaN4NaNNaN25271514340591721485112335922238583831144188429232953311265393748982101737254167411702135118134117742607111481481481461433463423
top111archive1NaNNaNdisk0/00/00/00/012010-01-06 13:43:482011-07-06 08:24:532010-01-06 13:43:48institutional10164NaNshowNaNNaNNaNNaNbad_oai_pmh_url_0duplicate_titleDuplicate title to <xhtml:table xmlns:xhtml=\"h...2010-01-13 10:44:49discoveredNaNNaNNaN0NaNNaNFALSEhttp://eprints.upnjatim.ac.id/Repositorio Institucionalhttp://kce.docressources.info/ws/PMBWs_2http://producao.usp.br/sword/servicedocumenthttp://eprints.upnjatim.ac.id/cgi/latest_tool?...http://my.indexcopernicus.com/fredemorenoinfo:other:archives.eprints.org:importTRUETRUEFALSEChinese Academy of Science (中国科学院)http://www.cas.cn/usLima34.1607-118.139dspacegeoname_2_USotherK12006-05-04 10:48:14¿Quién puede depositar documentos en el reposi...This repository is hosted by the Texas Digital...0001000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...0000[celestial, opendoar][1879, 2246][roarmap, celestial, opendoar]opendoar2021-01-252446208248060,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...0russell_group
freq133354441330NaNNaN11681638531NaN5402NaNNaNNaNNaN153324538NaNNaNNaN2201NaNNaN2614742521122805269627489989174252523418454841539929201220742210730951131141131142106411920520511135387387130
meanNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
stdNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
minNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
25%NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
50%NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
75%NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
maxNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " eprintid rev_number eprint_status userid importid source \\\n", "count 5444 5444 5444 5444 0.0 0.0 \n", "unique 5444 660 1 2189 NaN NaN \n", "top 1 11 archive 1 NaN NaN \n", "freq 1 333 5444 1330 NaN NaN \n", "mean NaN NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN NaN \n", "\n", " dir datestamp lastmod \\\n", "count 5444 5444 5444 \n", "unique 5444 4198 4043 \n", "top disk0/00/00/00/01 2010-01-06 13:43:48 2011-07-06 08:24:53 \n", "freq 1 16 8 \n", "mean NaN NaN NaN \n", "std NaN NaN NaN \n", "min NaN NaN NaN \n", "25% NaN NaN NaN \n", "50% NaN NaN NaN \n", "75% NaN NaN NaN \n", "max NaN NaN NaN \n", "\n", " status_changed type succeeds commentary \\\n", "count 5444 5444 108 0.0 \n", "unique 4230 12 108 NaN \n", "top 2010-01-06 13:43:48 institutional 10164 NaN \n", "freq 16 3853 1 NaN \n", "mean NaN NaN NaN NaN \n", "std NaN NaN NaN NaN \n", "min NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN \n", "max NaN NaN NaN NaN \n", "\n", " metadata_visibility latitude longitude relation_type relation_uri \\\n", "count 5444 0.0 0.0 0.0 0.0 \n", "unique 2 NaN NaN NaN NaN \n", "top show NaN NaN NaN NaN \n", "freq 5402 NaN NaN NaN NaN \n", "mean NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN \n", "\n", " item_issues_id item_issues_type \\\n", "count 63 63 \n", "unique 48 5 \n", "top bad_oai_pmh_url_0 duplicate_title \n", "freq 15 33 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " item_issues_description \\\n", "count 63 \n", "unique 62 \n", "top Duplicate title to