{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import ast\n", "import csv\n", "import json\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px\n", "\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading datasets" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidrev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
0921517archive1NaNNaNdisk0/00/00/09/212010-01-06 13:44:552016-04-17 21:54:112010-01-06 13:44:55thesesNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNFALSEhttp://alcme.oclc.org/ndltd/index.htmlNetworked Digital Library of Theses and Disser...http://alcme.oclc.org/ndltd/servlet/OAIHandlerNaNNaNNaNNaNTRUETRUENaNNaNNaNusNaNNaNNaNetddbgeoname_2_USotherNaN2006-04-18NaNNaN00015334000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,94862,168074,5...NaNNaNNaNNaNcelestial5NaNNaNNaNNaNNaNNaNNaNNaN0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...0NaN
11489556archive1NaNNaNdisk0/00/00/14/892010-01-06 13:46:052016-04-17 21:49:002010-01-06 13:46:05otherNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://prensahistorica.mcu.es/prensahistorica/...Virtual Library of Historical Presshttp://prensahistorica.mcu.es/prensahistorica/...NaNNaNNaNThe Virtual Library of Historical Press is the...TRUETRUENaNNaNNaNesNaNNaNNaNothergeoname_2_ESotherNaN2006-04-04 00:00:00NaNNaN00010084860,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1431,1431,1761...NaNNaNNaNNaNcelestial858NaNNaNNaNNaNNaNNaNNaNNaN0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...47500NaN
2606657archive1NaNNaNdisk0/00/00/06/062010-01-06 13:44:312016-04-17 21:53:142010-01-06 13:44:31subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://hal.archives-ouvertes.fr/HAL: Hyper Article en Lignehttp://hal.archives-ouvertes.fr/oai/oai.phpNaNNaNNaNNaNTRUETRUENaNNaNNaNfrNaNNaNNaNhalgeoname_2_FRotherNaN1998-11-02 11:53:57NaNNaN0026758164,12,17,26,43,57,81,185,431,861,1184,1517,2442...NaNNaNNaNNaNopendoar166NaNNaNNaN161110,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...3063NaN
3606NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNcelestial1106NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4606NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNroarmap69NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " eprintid rev_number eprint_status userid importid source dir \\\n", "0 921 517 archive 1 NaN NaN disk0/00/00/09/21 \n", "1 1489 556 archive 1 NaN NaN disk0/00/00/14/89 \n", "2 606 657 archive 1 NaN NaN disk0/00/00/06/06 \n", "3 606 NaN NaN NaN NaN NaN NaN \n", "4 606 NaN NaN NaN NaN NaN NaN \n", "\n", " datestamp lastmod status_changed type \\\n", "0 2010-01-06 13:44:55 2016-04-17 21:54:11 2010-01-06 13:44:55 theses \n", "1 2010-01-06 13:46:05 2016-04-17 21:49:00 2010-01-06 13:46:05 other \n", "2 2010-01-06 13:44:31 2016-04-17 21:53:14 2010-01-06 13:44:31 subject \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " succeeds commentary metadata_visibility latitude longitude relation_type \\\n", "0 NaN NaN show NaN NaN NaN \n", "1 NaN NaN show NaN NaN NaN \n", "2 NaN NaN show NaN NaN NaN \n", "3 NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN NaN \n", "\n", " relation_uri item_issues_id item_issues_type item_issues_description \\\n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " item_issues_timestamp item_issues_status item_issues_reported_by \\\n", "0 NaN NaN NaN \n", "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " item_issues_resolved_by item_issues_comment item_issues_count \\\n", "0 NaN NaN 0 \n", "1 NaN NaN 0 \n", "2 NaN NaN 0 \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " sword_depositor sword_slug exemplar \\\n", "0 NaN NaN FALSE \n", "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " home_page \\\n", "0 http://alcme.oclc.org/ndltd/index.html \n", "1 http://prensahistorica.mcu.es/prensahistorica/... \n", "2 http://hal.archives-ouvertes.fr/ \n", "3 NaN \n", "4 NaN \n", "\n", " title \\\n", "0 Networked Digital Library of Theses and Disser... \n", "1 Virtual Library of Historical Press \n", "2 HAL: Hyper Article en Ligne \n", "3 NaN \n", "4 NaN \n", "\n", " oai_pmh sword_endpoint rss_feed \\\n", "0 http://alcme.oclc.org/ndltd/servlet/OAIHandler NaN NaN \n", "1 http://prensahistorica.mcu.es/prensahistorica/... NaN NaN \n", "2 http://hal.archives-ouvertes.fr/oai/oai.php NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " twitter_feed description fulltext \\\n", "0 NaN NaN TRUE \n", "1 NaN The Virtual Library of Historical Press is the... TRUE \n", "2 NaN NaN TRUE \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " open_access mandate organisation_title organisation_home_page \\\n", "0 TRUE NaN NaN NaN \n", "1 TRUE NaN NaN NaN \n", "2 TRUE NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " location_country location_city location_latitude location_longitude \\\n", "0 us NaN NaN NaN \n", "1 es NaN NaN NaN \n", "2 fr NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " software geoname version subjects date note \\\n", "0 etddb geoname_2_US other NaN 2006-04-18 NaN \n", "1 other geoname_2_ES other NaN 2006-04-04 00:00:00 NaN \n", "2 hal geoname_2_FR other NaN 1998-11-02 11:53:57 NaN \n", "3 NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN NaN \n", "\n", " suggestions activity_low activity_medium activity_high recordcount \\\n", "0 NaN 0 0 0 1533400 \n", "1 NaN 0 0 0 1008486 \n", "2 NaN 0 0 2 675816 \n", "3 NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN \n", "\n", " recordhistory fulltexts_total \\\n", "0 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,94862,168074,5... NaN \n", "1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1431,1431,1761... NaN \n", "2 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name registry_id \\\n", "0 NaN NaN NaN celestial 5 \n", "1 NaN NaN NaN celestial 858 \n", "2 NaN NaN NaN opendoar 166 \n", "3 NaN NaN NaN celestial 1106 \n", "4 NaN NaN NaN roarmap 69 \n", "\n", " submit_to submitted_to_name submitted_to_done webometrics_rank \\\n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN \n", "2 NaN NaN NaN 1 \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " webometrics_size webometrics_visibility webometrics_rich_files \\\n", "0 NaN NaN NaN \n", "1 NaN NaN NaN \n", "2 6 1 1 \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " webometrics_scholar monthly_deposits \\\n", "0 NaN 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n", "1 NaN 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n", "2 1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " total_deposits association \n", "0 0 NaN \n", "1 47500 NaN \n", "2 3063 NaN \n", "3 NaN NaN \n", "4 NaN NaN " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df = pd.read_csv('../data/raw/export_roar_CSV.csv', dtype='str')\n", "roar_df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['eprintid', 'rev_number', 'eprint_status', 'userid', 'importid',\n", " 'source', 'dir', 'datestamp', 'lastmod', 'status_changed', 'type',\n", " 'succeeds', 'commentary', 'metadata_visibility', 'latitude',\n", " 'longitude', 'relation_type', 'relation_uri', 'item_issues_id',\n", " 'item_issues_type', 'item_issues_description', 'item_issues_timestamp',\n", " 'item_issues_status', 'item_issues_reported_by',\n", " 'item_issues_resolved_by', 'item_issues_comment', 'item_issues_count',\n", " 'sword_depositor', 'sword_slug', 'exemplar', 'home_page', 'title',\n", " 'oai_pmh', 'sword_endpoint', 'rss_feed', 'twitter_feed', 'description',\n", " 'fulltext', 'open_access', 'mandate', 'organisation_title',\n", " 'organisation_home_page', 'location_country', 'location_city',\n", " 'location_latitude', 'location_longitude', 'software', 'geoname',\n", " 'version', 'subjects', 'date', 'note', 'suggestions', 'activity_low',\n", " 'activity_medium', 'activity_high', 'recordcount', 'recordhistory',\n", " 'fulltexts_total', 'fulltexts_docs', 'fulltexts_rtotal',\n", " 'fulltexts_rdocs', 'registry_name', 'registry_id', 'submit_to',\n", " 'submitted_to_name', 'submitted_to_done', 'webometrics_rank',\n", " 'webometrics_size', 'webometrics_visibility', 'webometrics_rich_files',\n", " 'webometrics_scholar', 'monthly_deposits', 'total_deposits',\n", " 'association'],\n", " dtype='object')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df.columns" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidrev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
count1758953755375537500537553755375537553751070537500008686868686000224500265536853734468176152111537824127412736765182489852053774375237344637467153751052453602151872291229122912293229127025827025873987266567205205148148148148148756756231
unique53756581213500537541273966415812107020000513683200040025202507641791701468111330422244374328136187529272965311265323748302071717254167411704135118134117351663111481481481461433463422
top1015911archive1NaNNaNdisk0/00/00/14/072010-01-06 13:43:482011-07-06 08:24:532010-01-06 13:43:48institutional12637NaNshowNaNNaNNaNNaNbad_oai_pmh_url_0duplicate_titleSimilar title to <xhtml:table xmlns:xhtml=\"htt...2010-01-13 10:44:49discoveredNaNNaNNaN0NaNNaNFALSEhttp://eprints.upnjatim.ac.id/Repositorio Institucionalhttp://virtuelcampus.univ-msila.dz/fllhttp://npl.csircentral.net/http://eprints.upnjatim.ac.id/cgi/latest_tool?...https://twitter.com/rpsicomdp?lang=esinfo:other:archives.eprints.org:importTRUETRUEFALSEChinese Academy of Science (中国科学院)http://www.cas.cn/usLima34.1607-118.139dspacegeoname_2_USotherL12006-05-04 10:48:14¿Quién puede depositar documentos en el reposi...This repository is hosted by the Texas Digital...0001000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...0000celestial184celestialopendoar2021-01-253677383628248060,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...0russell_group
freq23833253751333NaNNaN11681637951NaN5334NaNNaNNaNNaN184736853NaNNaNNaN2204NaNNaN25847525211227582652269999902702525230784047713489929201520772213733951131141131143758721620520511135387387141
\n", "
" ], "text/plain": [ " eprintid rev_number eprint_status userid importid source \\\n", "count 17589 5375 5375 5375 0 0 \n", "unique 5375 658 1 2135 0 0 \n", "top 10159 11 archive 1 NaN NaN \n", "freq 238 332 5375 1333 NaN NaN \n", "\n", " dir datestamp lastmod \\\n", "count 5375 5375 5375 \n", "unique 5375 4127 3966 \n", "top disk0/00/00/14/07 2010-01-06 13:43:48 2011-07-06 08:24:53 \n", "freq 1 16 8 \n", "\n", " status_changed type succeeds commentary \\\n", "count 5375 5375 107 0 \n", "unique 4158 12 107 0 \n", "top 2010-01-06 13:43:48 institutional 12637 NaN \n", "freq 16 3795 1 NaN \n", "\n", " metadata_visibility latitude longitude relation_type relation_uri \\\n", "count 5375 0 0 0 0 \n", "unique 2 0 0 0 0 \n", "top show NaN NaN NaN NaN \n", "freq 5334 NaN NaN NaN NaN \n", "\n", " item_issues_id item_issues_type \\\n", "count 86 86 \n", "unique 51 3 \n", "top bad_oai_pmh_url_0 duplicate_title \n", "freq 18 47 \n", "\n", " item_issues_description \\\n", "count 86 \n", "unique 68 \n", "top Similar title to \n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidrev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
2606657archive1NaNNaNdisk0/00/00/06/062010-01-06 13:44:312016-04-17 21:53:142010-01-06 13:44:31subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://hal.archives-ouvertes.fr/HAL: Hyper Article en Lignehttp://hal.archives-ouvertes.fr/oai/oai.phpNaNNaNNaNNaNTRUETRUENaNNaNNaNfrNaNNaNNaNhalgeoname_2_FRotherNaN1998-11-02 11:53:57NaNNaN0026758164,12,17,26,43,57,81,185,431,861,1184,1517,2442...NaNNaNNaNNaNopendoar166NaNNaNNaN161110,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...3063NaN
3606NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNcelestial1106NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4606NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNroarmap69NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "" ], "text/plain": [ " eprintid rev_number eprint_status userid importid source dir \\\n", "2 606 657 archive 1 NaN NaN disk0/00/00/06/06 \n", "3 606 NaN NaN NaN NaN NaN NaN \n", "4 606 NaN NaN NaN NaN NaN NaN \n", "\n", " datestamp lastmod status_changed type \\\n", "2 2010-01-06 13:44:31 2016-04-17 21:53:14 2010-01-06 13:44:31 subject \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " succeeds commentary metadata_visibility latitude longitude relation_type \\\n", "2 NaN NaN show NaN NaN NaN \n", "3 NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN NaN \n", "\n", " relation_uri item_issues_id item_issues_type item_issues_description \\\n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " item_issues_timestamp item_issues_status item_issues_reported_by \\\n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " item_issues_resolved_by item_issues_comment item_issues_count \\\n", "2 NaN NaN 0 \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " sword_depositor sword_slug exemplar home_page \\\n", "2 NaN NaN NaN http://hal.archives-ouvertes.fr/ \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " title oai_pmh \\\n", "2 HAL: Hyper Article en Ligne http://hal.archives-ouvertes.fr/oai/oai.php \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " sword_endpoint rss_feed twitter_feed description fulltext open_access \\\n", "2 NaN NaN NaN NaN TRUE TRUE \n", "3 NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN NaN \n", "\n", " mandate organisation_title organisation_home_page location_country \\\n", "2 NaN NaN NaN fr \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " location_city location_latitude location_longitude software geoname \\\n", "2 NaN NaN NaN hal geoname_2_FR \n", "3 NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN \n", "\n", " version subjects date note suggestions activity_low \\\n", "2 other NaN 1998-11-02 11:53:57 NaN NaN 0 \n", "3 NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN NaN \n", "\n", " activity_medium activity_high recordcount \\\n", "2 0 2 675816 \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " recordhistory fulltexts_total \\\n", "2 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name registry_id \\\n", "2 NaN NaN NaN opendoar 166 \n", "3 NaN NaN NaN celestial 1106 \n", "4 NaN NaN NaN roarmap 69 \n", "\n", " submit_to submitted_to_name submitted_to_done webometrics_rank \\\n", "2 NaN NaN NaN 1 \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " webometrics_size webometrics_visibility webometrics_rich_files \\\n", "2 6 1 1 \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " webometrics_scholar monthly_deposits \\\n", "2 1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " total_deposits association \n", "2 3063 NaN \n", "3 NaN NaN \n", "4 NaN NaN " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df[roar_df.eprintid == '606']" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidrev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
2606657archive1NaNNaNdisk0/00/00/06/062010-01-06 13:44:312016-04-17 21:53:142010-01-06 13:44:31subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://hal.archives-ouvertes.fr/HAL: Hyper Article en Lignehttp://hal.archives-ouvertes.fr/oai/oai.phpNaNNaNNaNNaNTRUETRUENaNNaNNaNfrNaNNaNNaNhalgeoname_2_FRotherNaN1998-11-02 11:53:57NaNNaN0026758164,12,17,26,43,57,81,185,431,861,1184,1517,2442...NaNNaNNaNNaNopendoar166NaNNaNNaN161110,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...3063NaN
3606NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNcelestial1106NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4606NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNroarmap69NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " eprintid rev_number eprint_status userid importid source dir \\\n", "2 606 657 archive 1 NaN NaN disk0/00/00/06/06 \n", "3 606 NaN NaN NaN NaN NaN NaN \n", "4 606 NaN NaN NaN NaN NaN NaN \n", "\n", " datestamp lastmod status_changed type \\\n", "2 2010-01-06 13:44:31 2016-04-17 21:53:14 2010-01-06 13:44:31 subject \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " succeeds commentary metadata_visibility latitude longitude relation_type \\\n", "2 NaN NaN show NaN NaN NaN \n", "3 NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN NaN \n", "\n", " relation_uri item_issues_id item_issues_type item_issues_description \\\n", "2 NaN NaN NaN NaN \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " item_issues_timestamp item_issues_status item_issues_reported_by \\\n", "2 NaN NaN NaN \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " item_issues_resolved_by item_issues_comment item_issues_count \\\n", "2 NaN NaN 0 \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " sword_depositor sword_slug exemplar home_page \\\n", "2 NaN NaN NaN http://hal.archives-ouvertes.fr/ \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " title oai_pmh \\\n", "2 HAL: Hyper Article en Ligne http://hal.archives-ouvertes.fr/oai/oai.php \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " sword_endpoint rss_feed twitter_feed description fulltext open_access \\\n", "2 NaN NaN NaN NaN TRUE TRUE \n", "3 NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN NaN \n", "\n", " mandate organisation_title organisation_home_page location_country \\\n", "2 NaN NaN NaN fr \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " location_city location_latitude location_longitude software geoname \\\n", "2 NaN NaN NaN hal geoname_2_FR \n", "3 NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN \n", "\n", " version subjects date note suggestions activity_low \\\n", "2 other NaN 1998-11-02 11:53:57 NaN NaN 0 \n", "3 NaN NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN NaN \n", "\n", " activity_medium activity_high recordcount \\\n", "2 0 2 675816 \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " recordhistory fulltexts_total \\\n", "2 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name registry_id \\\n", "2 NaN NaN NaN opendoar 166 \n", "3 NaN NaN NaN celestial 1106 \n", "4 NaN NaN NaN roarmap 69 \n", "\n", " submit_to submitted_to_name submitted_to_done webometrics_rank \\\n", "2 NaN NaN NaN 1 \n", "3 NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN \n", "\n", " webometrics_size webometrics_visibility webometrics_rich_files \\\n", "2 6 1 1 \n", "3 NaN NaN NaN \n", "4 NaN NaN NaN \n", "\n", " webometrics_scholar monthly_deposits \\\n", "2 1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n", "3 NaN NaN \n", "4 NaN NaN \n", "\n", " total_deposits association \n", "2 3063 NaN \n", "3 NaN NaN \n", "4 NaN NaN " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df[roar_df.eprintid == '606']" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
eprintid
1{nan, 633}{nan, archive}{nan, 1}{nan}{nan}{nan, disk0/00/00/00/01}{nan, 2010-01-06 13:43:48}{nan, 2011-07-18 05:40:07}{nan, 2010-01-06 13:43:48}{nan, subject}{nan}{nan}{nan, show}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan, 0}{nan}{nan}{nan}{nan, http://archivesic.ccsd.cnrs.fr/}{@RCHIVESIC , nan}{nan, http://archivesic.ccsd.cnrs.fr/oai/oai.php}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan, fr}{nan}{nan}{nan}{nan, hal}{geoname_2_FR, nan}{nan, other}{nan}{nan, 2002-05-17 19:24:41}{nan}{nan}{nan, 0}{nan, 0}{nan, 0}{nan, 25}{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0...{nan}{nan}{nan}{nan}{celestial, opendoar}{58, 669}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}
10{nan, 511}{nan, archive}{nan, 1}{nan}{nan}{nan, disk0/00/00/00/10}{nan, 2010-01-06 13:43:48}{nan, 2011-07-18 05:40:13}{nan, 2010-01-06 13:43:48}{nan, institutional}{nan}{nan}{nan, show}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan, 0}{nan}{nan}{nan}{nan, http://www.diva-portal.org/mdh/}{nan, Academic Archive On-line (Mälardalen Uni...{nan, http://www.diva-portal.org/oai/mdh/OAI}{nan}{nan}{nan}{nan}{nan, TRUE}{nan, TRUE}{nan}{nan}{nan}{nan, se}{nan, Uppsala}{nan, 59.8667}{nan, 17.6333}{nan, diva}{nan, geoname_2_SE}{nan, other}{nan}{nan, 2005-12-08 13:15:22}{nan}{nan}{nan, 0}{nan, 0}{nan, 0}{nan, 100}{nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,1...{nan}{nan}{nan}{nan}{celestial, opendoar}{258, 526}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}
1000{274}{archive}{1}{nan}{nan}{disk0/00/00/10/00}{2010-01-06 13:45:01}{2011-07-06 08:21:21}{2010-01-06 13:45:01}{subject}{nan}{nan}{show}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{0}{nan}{nan}{nan}{http://pam.pisharp.org/}{PAM - Portuguese Archive of Mathematics}{nan}{nan}{nan}{nan}{nan}{TRUE}{TRUE}{nan}{nan}{nan}{pt}{Bellevue, WA}{47.6034}{-122.155}{dspace}{geoname_2_PT}{other}{nan}{2006-05-04 10:48:14}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}
10001{nan, 20}{nan, archive}{nan, 91}{nan}{nan}{nan, disk0/00/01/00/01}{nan, 2015-08-08 14:52:11}{nan, 2016-03-21 19:44:01}{nan, 2015-08-08 14:52:11}{nan, subject}{nan}{nan}{nan, show}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{http://edoc.sub.uni-hamburg.de/klimawandel/, ...{nan, Klimawandel Dokumentenserver}{nan, http://edoc.sub.uni-hamburg.de/klimawand...{nan}{nan}{nan}{nan, The \"Documentenserver Klimawandel\" (Repo...{nan, TRUE}{nan, TRUE}{nan, TRUE}{nan, KLIMZUG projects, Helmholtz-Zentrum Gees...{http://www.climateservicecenter.de/, nan, htt...{nan, de}{nan, Hamburg}{nan, 53.5511}{nan, 9.9937}{nan, opus}{nan, geoname_2_DE}{nan, other}{S1, HD, GF, GE, G1}{nan, 2015-07-02 08:08:31}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan, celestial, opendoar}{3408, 5881, nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}
10008{11}{archive}{404}{nan}{nan}{disk0/00/01/00/08}{2015-08-08 14:52:26}{2016-03-21 19:43:51}{2015-08-08 14:52:26}{institutional}{nan}{nan}{show}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{http://creativematter.skidmore.edu/}{Creative Matter | Skidmore College Research}{http://creativematter.skidmore.edu/do/oai/}{nan}{http://creativematter.skidmore.edu/recent.rss}{nan}{Welcome to Creative Matter, a repository for ...{TRUE}{FALSE}{FALSE}{Skidmore College}{http://www.skidmore.edu/}{us}{Saratoga Springs}{43.0961}{-73.7818}{bepress}{geoname_2_US}{other}{nan}{2015-07-06 17:35:50}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{celestial}{5882}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}{nan}
\n", "
" ], "text/plain": [ " rev_number eprint_status userid importid source \\\n", "eprintid \n", "1 {nan, 633} {nan, archive} {nan, 1} {nan} {nan} \n", "10 {nan, 511} {nan, archive} {nan, 1} {nan} {nan} \n", "1000 {274} {archive} {1} {nan} {nan} \n", "10001 {nan, 20} {nan, archive} {nan, 91} {nan} {nan} \n", "10008 {11} {archive} {404} {nan} {nan} \n", "\n", " dir datestamp \\\n", "eprintid \n", "1 {nan, disk0/00/00/00/01} {nan, 2010-01-06 13:43:48} \n", "10 {nan, disk0/00/00/00/10} {nan, 2010-01-06 13:43:48} \n", "1000 {disk0/00/00/10/00} {2010-01-06 13:45:01} \n", "10001 {nan, disk0/00/01/00/01} {nan, 2015-08-08 14:52:11} \n", "10008 {disk0/00/01/00/08} {2015-08-08 14:52:26} \n", "\n", " lastmod status_changed \\\n", "eprintid \n", "1 {nan, 2011-07-18 05:40:07} {nan, 2010-01-06 13:43:48} \n", "10 {nan, 2011-07-18 05:40:13} {nan, 2010-01-06 13:43:48} \n", "1000 {2011-07-06 08:21:21} {2010-01-06 13:45:01} \n", "10001 {nan, 2016-03-21 19:44:01} {nan, 2015-08-08 14:52:11} \n", "10008 {2016-03-21 19:43:51} {2015-08-08 14:52:26} \n", "\n", " type succeeds commentary metadata_visibility \\\n", "eprintid \n", "1 {nan, subject} {nan} {nan} {nan, show} \n", "10 {nan, institutional} {nan} {nan} {nan, show} \n", "1000 {subject} {nan} {nan} {show} \n", "10001 {nan, subject} {nan} {nan} {nan, show} \n", "10008 {institutional} {nan} {nan} {show} \n", "\n", " latitude longitude relation_type relation_uri item_issues_id \\\n", "eprintid \n", "1 {nan} {nan} {nan} {nan} {nan} \n", "10 {nan} {nan} {nan} {nan} {nan} \n", "1000 {nan} {nan} {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} {nan} {nan} \n", "\n", " item_issues_type item_issues_description item_issues_timestamp \\\n", "eprintid \n", "1 {nan} {nan} {nan} \n", "10 {nan} {nan} {nan} \n", "1000 {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} \n", "\n", " item_issues_status item_issues_reported_by item_issues_resolved_by \\\n", "eprintid \n", "1 {nan} {nan} {nan} \n", "10 {nan} {nan} {nan} \n", "1000 {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} \n", "\n", " item_issues_comment item_issues_count sword_depositor sword_slug \\\n", "eprintid \n", "1 {nan} {nan, 0} {nan} {nan} \n", "10 {nan} {nan, 0} {nan} {nan} \n", "1000 {nan} {0} {nan} {nan} \n", "10001 {nan} {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} {nan} \n", "\n", " exemplar home_page \\\n", "eprintid \n", "1 {nan} {nan, http://archivesic.ccsd.cnrs.fr/} \n", "10 {nan} {nan, http://www.diva-portal.org/mdh/} \n", "1000 {nan} {http://pam.pisharp.org/} \n", "10001 {nan} {http://edoc.sub.uni-hamburg.de/klimawandel/, ... \n", "10008 {nan} {http://creativematter.skidmore.edu/} \n", "\n", " title \\\n", "eprintid \n", "1 {@RCHIVESIC , nan} \n", "10 {nan, Academic Archive On-line (Mälardalen Uni... \n", "1000 {PAM - Portuguese Archive of Mathematics} \n", "10001 {nan, Klimawandel Dokumentenserver} \n", "10008 {Creative Matter | Skidmore College Research} \n", "\n", " oai_pmh sword_endpoint \\\n", "eprintid \n", "1 {nan, http://archivesic.ccsd.cnrs.fr/oai/oai.php} {nan} \n", "10 {nan, http://www.diva-portal.org/oai/mdh/OAI} {nan} \n", "1000 {nan} {nan} \n", "10001 {nan, http://edoc.sub.uni-hamburg.de/klimawand... {nan} \n", "10008 {http://creativematter.skidmore.edu/do/oai/} {nan} \n", "\n", " rss_feed twitter_feed \\\n", "eprintid \n", "1 {nan} {nan} \n", "10 {nan} {nan} \n", "1000 {nan} {nan} \n", "10001 {nan} {nan} \n", "10008 {http://creativematter.skidmore.edu/recent.rss} {nan} \n", "\n", " description fulltext \\\n", "eprintid \n", "1 {nan} {nan} \n", "10 {nan} {nan, TRUE} \n", "1000 {nan} {TRUE} \n", "10001 {nan, The \"Documentenserver Klimawandel\" (Repo... {nan, TRUE} \n", "10008 {Welcome to Creative Matter, a repository for ... {TRUE} \n", "\n", " open_access mandate \\\n", "eprintid \n", "1 {nan} {nan} \n", "10 {nan, TRUE} {nan} \n", "1000 {TRUE} {nan} \n", "10001 {nan, TRUE} {nan, TRUE} \n", "10008 {FALSE} {FALSE} \n", "\n", " organisation_title \\\n", "eprintid \n", "1 {nan} \n", "10 {nan} \n", "1000 {nan} \n", "10001 {nan, KLIMZUG projects, Helmholtz-Zentrum Gees... \n", "10008 {Skidmore College} \n", "\n", " organisation_home_page location_country \\\n", "eprintid \n", "1 {nan} {nan, fr} \n", "10 {nan} {nan, se} \n", "1000 {nan} {pt} \n", "10001 {http://www.climateservicecenter.de/, nan, htt... {nan, de} \n", "10008 {http://www.skidmore.edu/} {us} \n", "\n", " location_city location_latitude location_longitude \\\n", "eprintid \n", "1 {nan} {nan} {nan} \n", "10 {nan, Uppsala} {nan, 59.8667} {nan, 17.6333} \n", "1000 {Bellevue, WA} {47.6034} {-122.155} \n", "10001 {nan, Hamburg} {nan, 53.5511} {nan, 9.9937} \n", "10008 {Saratoga Springs} {43.0961} {-73.7818} \n", "\n", " software geoname version \\\n", "eprintid \n", "1 {nan, hal} {geoname_2_FR, nan} {nan, other} \n", "10 {nan, diva} {nan, geoname_2_SE} {nan, other} \n", "1000 {dspace} {geoname_2_PT} {other} \n", "10001 {nan, opus} {nan, geoname_2_DE} {nan, other} \n", "10008 {bepress} {geoname_2_US} {other} \n", "\n", " subjects date note suggestions \\\n", "eprintid \n", "1 {nan} {nan, 2002-05-17 19:24:41} {nan} {nan} \n", "10 {nan} {nan, 2005-12-08 13:15:22} {nan} {nan} \n", "1000 {nan} {2006-05-04 10:48:14} {nan} {nan} \n", "10001 {S1, HD, GF, GE, G1} {nan, 2015-07-02 08:08:31} {nan} {nan} \n", "10008 {nan} {2015-07-06 17:35:50} {nan} {nan} \n", "\n", " activity_low activity_medium activity_high recordcount \\\n", "eprintid \n", "1 {nan, 0} {nan, 0} {nan, 0} {nan, 25} \n", "10 {nan, 0} {nan, 0} {nan, 0} {nan, 100} \n", "1000 {nan} {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} {nan} \n", "\n", " recordhistory fulltexts_total \\\n", "eprintid \n", "1 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0... {nan} \n", "10 {nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,1... {nan} \n", "1000 {nan} {nan} \n", "10001 {nan} {nan} \n", "10008 {nan} {nan} \n", "\n", " fulltexts_docs fulltexts_rtotal fulltexts_rdocs \\\n", "eprintid \n", "1 {nan} {nan} {nan} \n", "10 {nan} {nan} {nan} \n", "1000 {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} \n", "\n", " registry_name registry_id submit_to \\\n", "eprintid \n", "1 {celestial, opendoar} {58, 669} {nan} \n", "10 {celestial, opendoar} {258, 526} {nan} \n", "1000 {nan} {nan} {nan} \n", "10001 {nan, celestial, opendoar} {3408, 5881, nan} {nan} \n", "10008 {celestial} {5882} {nan} \n", "\n", " submitted_to_name submitted_to_done webometrics_rank \\\n", "eprintid \n", "1 {nan} {nan} {nan} \n", "10 {nan} {nan} {nan} \n", "1000 {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} \n", "\n", " webometrics_size webometrics_visibility webometrics_rich_files \\\n", "eprintid \n", "1 {nan} {nan} {nan} \n", "10 {nan} {nan} {nan} \n", "1000 {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} \n", "\n", " webometrics_scholar monthly_deposits total_deposits association \n", "eprintid \n", "1 {nan} {nan} {nan} {nan} \n", "10 {nan} {nan} {nan} {nan} \n", "1000 {nan} {nan} {nan} {nan} \n", "10001 {nan} {nan} {nan} {nan} \n", "10008 {nan} {nan} {nan} {nan} " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df = roar_df.groupby('eprintid').aggregate(set)\n", "roar_df.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
rev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
eprintid
1633archive1NaNNaNdisk0/00/00/00/012010-01-06 13:43:482011-07-18 05:40:072010-01-06 13:43:48subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://archivesic.ccsd.cnrs.fr/@RCHIVESIChttp://archivesic.ccsd.cnrs.fr/oai/oai.phpNaNNaNNaNNaNNaNNaNNaNNaNNaNfrNaNNaNNaNhalgeoname_2_FRotherNaN2002-05-17 19:24:41NaNNaN000250,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...NaNNaNNaNNaN[celestial, opendoar][58, 669]NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
10511archive1NaNNaNdisk0/00/00/00/102010-01-06 13:43:482011-07-18 05:40:132010-01-06 13:43:48institutionalNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://www.diva-portal.org/mdh/Academic Archive On-line (Mälardalen Universit...http://www.diva-portal.org/oai/mdh/OAINaNNaNNaNNaNTRUETRUENaNNaNNaNseUppsala59.866717.6333divageoname_2_SEotherNaN2005-12-08 13:15:22NaNNaN0001000,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100...NaNNaNNaNNaN[celestial, opendoar][258, 526]NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1000274archive1NaNNaNdisk0/00/00/10/002010-01-06 13:45:012011-07-06 08:21:212010-01-06 13:45:01subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://pam.pisharp.org/PAM - Portuguese Archive of MathematicsNaNNaNNaNNaNNaNTRUETRUENaNNaNNaNptBellevue, WA47.6034-122.155dspacegeoname_2_PTotherNaN2006-05-04 10:48:14NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1000120archive91NaNNaNdisk0/00/01/00/012015-08-08 14:52:112016-03-21 19:44:012015-08-08 14:52:11subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNhttp://edoc.sub.uni-hamburg.de/klimawandel/Klimawandel Dokumentenserverhttp://edoc.sub.uni-hamburg.de/klimawandel/oaiNaNNaNNaNThe \"Documentenserver Klimawandel\" (Repository...TRUETRUETRUE[KLIMZUG projects, Helmholtz-Zentrum Geesthach...[http://www.climateservicecenter.de/, http://w...deHamburg53.55119.9937opusgeoname_2_DEother[S1, GE, HD, GF, G1]2015-07-02 08:08:31NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN[celestial, opendoar][3408, 5881]NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1000811archive404NaNNaNdisk0/00/01/00/082015-08-08 14:52:262016-03-21 19:43:512015-08-08 14:52:26institutionalNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNhttp://creativematter.skidmore.edu/Creative Matter | Skidmore College Researchhttp://creativematter.skidmore.edu/do/oai/NaNhttp://creativematter.skidmore.edu/recent.rssNaNWelcome to Creative Matter, a repository for t...TRUEFALSEFALSESkidmore Collegehttp://www.skidmore.edu/usSaratoga Springs43.0961-73.7818bepressgeoname_2_USotherNaN2015-07-06 17:35:50NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNcelestial5882NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " rev_number eprint_status userid importid source dir \\\n", "eprintid \n", "1 633 archive 1 NaN NaN disk0/00/00/00/01 \n", "10 511 archive 1 NaN NaN disk0/00/00/00/10 \n", "1000 274 archive 1 NaN NaN disk0/00/00/10/00 \n", "10001 20 archive 91 NaN NaN disk0/00/01/00/01 \n", "10008 11 archive 404 NaN NaN disk0/00/01/00/08 \n", "\n", " datestamp lastmod status_changed \\\n", "eprintid \n", "1 2010-01-06 13:43:48 2011-07-18 05:40:07 2010-01-06 13:43:48 \n", "10 2010-01-06 13:43:48 2011-07-18 05:40:13 2010-01-06 13:43:48 \n", "1000 2010-01-06 13:45:01 2011-07-06 08:21:21 2010-01-06 13:45:01 \n", "10001 2015-08-08 14:52:11 2016-03-21 19:44:01 2015-08-08 14:52:11 \n", "10008 2015-08-08 14:52:26 2016-03-21 19:43:51 2015-08-08 14:52:26 \n", "\n", " type succeeds commentary metadata_visibility latitude \\\n", "eprintid \n", "1 subject NaN NaN show NaN \n", "10 institutional NaN NaN show NaN \n", "1000 subject NaN NaN show NaN \n", "10001 subject NaN NaN show NaN \n", "10008 institutional NaN NaN show NaN \n", "\n", " longitude relation_type relation_uri item_issues_id \\\n", "eprintid \n", "1 NaN NaN NaN NaN \n", "10 NaN NaN NaN NaN \n", "1000 NaN NaN NaN NaN \n", "10001 NaN NaN NaN NaN \n", "10008 NaN NaN NaN NaN \n", "\n", " item_issues_type item_issues_description item_issues_timestamp \\\n", "eprintid \n", "1 NaN NaN NaN \n", "10 NaN NaN NaN \n", "1000 NaN NaN NaN \n", "10001 NaN NaN NaN \n", "10008 NaN NaN NaN \n", "\n", " item_issues_status item_issues_reported_by item_issues_resolved_by \\\n", "eprintid \n", "1 NaN NaN NaN \n", "10 NaN NaN NaN \n", "1000 NaN NaN NaN \n", "10001 NaN NaN NaN \n", "10008 NaN NaN NaN \n", "\n", " item_issues_comment item_issues_count sword_depositor sword_slug \\\n", "eprintid \n", "1 NaN 0 NaN NaN \n", "10 NaN 0 NaN NaN \n", "1000 NaN 0 NaN NaN \n", "10001 NaN NaN NaN NaN \n", "10008 NaN NaN NaN NaN \n", "\n", " exemplar home_page \\\n", "eprintid \n", "1 NaN http://archivesic.ccsd.cnrs.fr/ \n", "10 NaN http://www.diva-portal.org/mdh/ \n", "1000 NaN http://pam.pisharp.org/ \n", "10001 NaN http://edoc.sub.uni-hamburg.de/klimawandel/ \n", "10008 NaN http://creativematter.skidmore.edu/ \n", "\n", " title \\\n", "eprintid \n", "1 @RCHIVESIC \n", "10 Academic Archive On-line (Mälardalen Universit... \n", "1000 PAM - Portuguese Archive of Mathematics \n", "10001 Klimawandel Dokumentenserver \n", "10008 Creative Matter | Skidmore College Research \n", "\n", " oai_pmh sword_endpoint \\\n", "eprintid \n", "1 http://archivesic.ccsd.cnrs.fr/oai/oai.php NaN \n", "10 http://www.diva-portal.org/oai/mdh/OAI NaN \n", "1000 NaN NaN \n", "10001 http://edoc.sub.uni-hamburg.de/klimawandel/oai NaN \n", "10008 http://creativematter.skidmore.edu/do/oai/ NaN \n", "\n", " rss_feed twitter_feed \\\n", "eprintid \n", "1 NaN NaN \n", "10 NaN NaN \n", "1000 NaN NaN \n", "10001 NaN NaN \n", "10008 http://creativematter.skidmore.edu/recent.rss NaN \n", "\n", " description fulltext \\\n", "eprintid \n", "1 NaN NaN \n", "10 NaN TRUE \n", "1000 NaN TRUE \n", "10001 The \"Documentenserver Klimawandel\" (Repository... TRUE \n", "10008 Welcome to Creative Matter, a repository for t... TRUE \n", "\n", " open_access mandate \\\n", "eprintid \n", "1 NaN NaN \n", "10 TRUE NaN \n", "1000 TRUE NaN \n", "10001 TRUE TRUE \n", "10008 FALSE FALSE \n", "\n", " organisation_title \\\n", "eprintid \n", "1 NaN \n", "10 NaN \n", "1000 NaN \n", "10001 [KLIMZUG projects, Helmholtz-Zentrum Geesthach... \n", "10008 Skidmore College \n", "\n", " organisation_home_page location_country \\\n", "eprintid \n", "1 NaN fr \n", "10 NaN se \n", "1000 NaN pt \n", "10001 [http://www.climateservicecenter.de/, http://w... de \n", "10008 http://www.skidmore.edu/ us \n", "\n", " location_city location_latitude location_longitude software \\\n", "eprintid \n", "1 NaN NaN NaN hal \n", "10 Uppsala 59.8667 17.6333 diva \n", "1000 Bellevue, WA 47.6034 -122.155 dspace \n", "10001 Hamburg 53.5511 9.9937 opus \n", "10008 Saratoga Springs 43.0961 -73.7818 bepress \n", "\n", " geoname version subjects date \\\n", "eprintid \n", "1 geoname_2_FR other NaN 2002-05-17 19:24:41 \n", "10 geoname_2_SE other NaN 2005-12-08 13:15:22 \n", "1000 geoname_2_PT other NaN 2006-05-04 10:48:14 \n", "10001 geoname_2_DE other [S1, GE, HD, GF, G1] 2015-07-02 08:08:31 \n", "10008 geoname_2_US other NaN 2015-07-06 17:35:50 \n", "\n", " note suggestions activity_low activity_medium activity_high \\\n", "eprintid \n", "1 NaN NaN 0 0 0 \n", "10 NaN NaN 0 0 0 \n", "1000 NaN NaN NaN NaN NaN \n", "10001 NaN NaN NaN NaN NaN \n", "10008 NaN NaN NaN NaN NaN \n", "\n", " recordcount recordhistory \\\n", "eprintid \n", "1 25 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... \n", "10 100 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100... \n", "1000 NaN NaN \n", "10001 NaN NaN \n", "10008 NaN NaN \n", "\n", " fulltexts_total fulltexts_docs fulltexts_rtotal fulltexts_rdocs \\\n", "eprintid \n", "1 NaN NaN NaN NaN \n", "10 NaN NaN NaN NaN \n", "1000 NaN NaN NaN NaN \n", "10001 NaN NaN NaN NaN \n", "10008 NaN NaN NaN NaN \n", "\n", " registry_name registry_id submit_to submitted_to_name \\\n", "eprintid \n", "1 [celestial, opendoar] [58, 669] NaN NaN \n", "10 [celestial, opendoar] [258, 526] NaN NaN \n", "1000 NaN NaN NaN NaN \n", "10001 [celestial, opendoar] [3408, 5881] NaN NaN \n", "10008 celestial 5882 NaN NaN \n", "\n", " submitted_to_done webometrics_rank webometrics_size \\\n", "eprintid \n", "1 NaN NaN NaN \n", "10 NaN NaN NaN \n", "1000 NaN NaN NaN \n", "10001 NaN NaN NaN \n", "10008 NaN NaN NaN \n", "\n", " webometrics_visibility webometrics_rich_files webometrics_scholar \\\n", "eprintid \n", "1 NaN NaN NaN \n", "10 NaN NaN NaN \n", "1000 NaN NaN NaN \n", "10001 NaN NaN NaN \n", "10008 NaN NaN NaN \n", "\n", " monthly_deposits total_deposits association \n", "eprintid \n", "1 NaN NaN NaN \n", "10 NaN NaN NaN \n", "1000 NaN NaN NaN \n", "10001 NaN NaN NaN \n", "10008 NaN NaN NaN " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def value_or_list(cell_set):\n", " copy = set(cell_set)\n", " copy.discard(np.nan) \n", " if len(copy) == 0:\n", " return np.nan\n", " if len(copy) == 1:\n", " return copy.pop()\n", " return list(copy)\n", " \n", "roar_df = roar_df.applymap(value_or_list)\n", "roar_df.head()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidrev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
4188606657archive1NaNNaNdisk0/00/00/06/062010-01-06 13:44:312016-04-17 21:53:142010-01-06 13:44:31subjectNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0NaNNaNNaNhttp://hal.archives-ouvertes.fr/HAL: Hyper Article en Lignehttp://hal.archives-ouvertes.fr/oai/oai.phpNaNNaNNaNNaNTRUETRUENaNNaNNaNfrNaNNaNNaNhalgeoname_2_FRotherNaN1998-11-02 11:53:57NaNNaN0026758164,12,17,26,43,57,81,185,431,861,1184,1517,2442...NaNNaNNaNNaN[celestial, roarmap, opendoar][69, 166, 1106]NaNNaNNaN161110,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...3063NaN
\n", "
" ], "text/plain": [ " eprintid rev_number eprint_status userid importid source \\\n", "4188 606 657 archive 1 NaN NaN \n", "\n", " dir datestamp lastmod \\\n", "4188 disk0/00/00/06/06 2010-01-06 13:44:31 2016-04-17 21:53:14 \n", "\n", " status_changed type succeeds commentary metadata_visibility \\\n", "4188 2010-01-06 13:44:31 subject NaN NaN show \n", "\n", " latitude longitude relation_type relation_uri item_issues_id \\\n", "4188 NaN NaN NaN NaN NaN \n", "\n", " item_issues_type item_issues_description item_issues_timestamp \\\n", "4188 NaN NaN NaN \n", "\n", " item_issues_status item_issues_reported_by item_issues_resolved_by \\\n", "4188 NaN NaN NaN \n", "\n", " item_issues_comment item_issues_count sword_depositor sword_slug \\\n", "4188 NaN 0 NaN NaN \n", "\n", " exemplar home_page title \\\n", "4188 NaN http://hal.archives-ouvertes.fr/ HAL: Hyper Article en Ligne \n", "\n", " oai_pmh sword_endpoint rss_feed \\\n", "4188 http://hal.archives-ouvertes.fr/oai/oai.php NaN NaN \n", "\n", " twitter_feed description fulltext open_access mandate organisation_title \\\n", "4188 NaN NaN TRUE TRUE NaN NaN \n", "\n", " organisation_home_page location_country location_city location_latitude \\\n", "4188 NaN fr NaN NaN \n", "\n", " location_longitude software geoname version subjects \\\n", "4188 NaN hal geoname_2_FR other NaN \n", "\n", " date note suggestions activity_low activity_medium \\\n", "4188 1998-11-02 11:53:57 NaN NaN 0 0 \n", "\n", " activity_high recordcount \\\n", "4188 2 675816 \n", "\n", " recordhistory fulltexts_total \\\n", "4188 4,12,17,26,43,57,81,185,431,861,1184,1517,2442... NaN \n", "\n", " fulltexts_docs fulltexts_rtotal fulltexts_rdocs \\\n", "4188 NaN NaN NaN \n", "\n", " registry_name registry_id submit_to \\\n", "4188 [celestial, roarmap, opendoar] [69, 166, 1106] NaN \n", "\n", " submitted_to_name submitted_to_done webometrics_rank webometrics_size \\\n", "4188 NaN NaN 1 6 \n", "\n", " webometrics_visibility webometrics_rich_files webometrics_scholar \\\n", "4188 1 1 1 \n", "\n", " monthly_deposits total_deposits \\\n", "4188 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... 3063 \n", "\n", " association \n", "4188 NaN " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df.reset_index(inplace=True)\n", "roar_df[roar_df.eprintid == '606']" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidrev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
count53755375537553750.00.0537553755375537553751070.053750.00.00.00.063636363630.00.00.022450.00.026553685373426717615211153782412741273676439642265080365536813664463746715375125053602151872291229122912293229127025827025846034578293205205148148148148148756756217
unique537565812135NaNNaN537541273966415812107NaN2NaNNaNNaNNaN4856243NaNNaNNaN4NaNNaN25202507639941701468111330422238023771143186128872917311265390648302071717254167411704135118134117742577111481481481461433463423
top125911archive1NaNNaNdisk0/00/00/14/072010-01-06 13:43:482016-05-02 05:43:042010-01-06 13:43:48institutional12637NaNshowNaNNaNNaNNaNbad_oai_pmh_url_0duplicate_titleDuplicate title to <xhtml:table xmlns:xhtml=\"h...2010-01-13 10:44:49discoveredNaNNaNNaN0NaNNaNFALSEhttp://eprints.upnjatim.ac.id/Repositorio Institucionalhttp://kce.docressources.info/ws/PMBWs_2http://npl.csircentral.net/http://eprints.upnjatim.ac.id/cgi/latest_tool?...http://twitter.com/bu_ufscinfo:other:archives.eprints.org:importTRUETRUEFALSEChinese Academy of Science (中国科学院)http://www.cas.cn/usLima34.1607-118.139dspacegeoname_2_USotherK12006-05-04 10:48:14DSpace@Işık is a growing collection of Işık Un...This repository is hosted by the Texas Digital...0001000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...0000[celestial, opendoar][2246, 1879][celestial, roarmap, opendoar]opendoar2021-01-253677386688248060,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...0russell_group
freq133253751333NaNNaN11681637951NaN5334NaNNaNNaNNaN153324538NaNNaNNaN2204NaNNaN258474252112275826522699998866925252307840477153992920152077221373395113114113114211449220520511135387387127
meanNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
stdNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
minNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
25%NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
50%NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
75%NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
maxNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " eprintid rev_number eprint_status userid importid source \\\n", "count 5375 5375 5375 5375 0.0 0.0 \n", "unique 5375 658 1 2135 NaN NaN \n", "top 1259 11 archive 1 NaN NaN \n", "freq 1 332 5375 1333 NaN NaN \n", "mean NaN NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN NaN \n", "\n", " dir datestamp lastmod \\\n", "count 5375 5375 5375 \n", "unique 5375 4127 3966 \n", "top disk0/00/00/14/07 2010-01-06 13:43:48 2016-05-02 05:43:04 \n", "freq 1 16 8 \n", "mean NaN NaN NaN \n", "std NaN NaN NaN \n", "min NaN NaN NaN \n", "25% NaN NaN NaN \n", "50% NaN NaN NaN \n", "75% NaN NaN NaN \n", "max NaN NaN NaN \n", "\n", " status_changed type succeeds commentary \\\n", "count 5375 5375 107 0.0 \n", "unique 4158 12 107 NaN \n", "top 2010-01-06 13:43:48 institutional 12637 NaN \n", "freq 16 3795 1 NaN \n", "mean NaN NaN NaN NaN \n", "std NaN NaN NaN NaN \n", "min NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN \n", "max NaN NaN NaN NaN \n", "\n", " metadata_visibility latitude longitude relation_type relation_uri \\\n", "count 5375 0.0 0.0 0.0 0.0 \n", "unique 2 NaN NaN NaN NaN \n", "top show NaN NaN NaN NaN \n", "freq 5334 NaN NaN NaN NaN \n", "mean NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN \n", "min NaN NaN NaN NaN NaN \n", "25% NaN NaN NaN NaN NaN \n", "50% NaN NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN NaN \n", "max NaN NaN NaN NaN NaN \n", "\n", " item_issues_id item_issues_type \\\n", "count 63 63 \n", "unique 48 5 \n", "top bad_oai_pmh_url_0 duplicate_title \n", "freq 15 33 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " item_issues_description \\\n", "count 63 \n", "unique 62 \n", "top Duplicate title to