registries_analysis/notebooks/03-overlap.ipynb

2339 lines
96 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"import ast\n",
"import csv\n",
"import json\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib_venn import venn2, venn2_circles\n",
"\n",
"import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n",
"import plotly.express as px\n",
"\n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dedup_id</th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>dedup::29a83a8a9641bb860a679d7e5ba52d26</td>\n",
" <td>14174</td>\n",
" <td>14174</td>\n",
" <td>OHIO Open Library | Ohio University Research</td>\n",
" <td>roar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>dedup::000871c1fc726f0b52dc86a4eeb027de</td>\n",
" <td>4612</td>\n",
" <td>4612</td>\n",
" <td>IIT Bombay Institutional Repository</td>\n",
" <td>roar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>dedup::000871c1fc726f0b52dc86a4eeb027de</td>\n",
" <td>4649</td>\n",
" <td>4649</td>\n",
" <td>IIT Bombay Institutional Repository</td>\n",
" <td>roar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>dedup::001e6d882e54c780ce269d3c46997287</td>\n",
" <td>re3data_____::4af9fe2bb93511a5e0f0c39e94d6557f</td>\n",
" <td>r3d100011306</td>\n",
" <td>RESID Database of Protein Modifications</td>\n",
" <td>re3data</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>dedup::001e6d882e54c780ce269d3c46997287</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.q...</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.q...</td>\n",
" <td>RESID Database of Protein Modifications</td>\n",
" <td>FAIRsharing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>dedup::0023a1e3447fdb31836536cc903f1310</td>\n",
" <td>opendoar____::c6f798b844366ccd65d99bc7f31e0e02</td>\n",
" <td>3410</td>\n",
" <td>erucu: electronic repository of the ukrainian ...</td>\n",
" <td>OpenDOAR</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>dedup::0023a1e3447fdb31836536cc903f1310</td>\n",
" <td>10013</td>\n",
" <td>10013</td>\n",
" <td>ErUCU: Electronic repository of the Ukrainian ...</td>\n",
" <td>roar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>dedup::003ab6b40af9b488decea7c582d150a2</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.d...</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.d...</td>\n",
" <td>Synapse</td>\n",
" <td>FAIRsharing</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>dedup::003ab6b40af9b488decea7c582d150a2</td>\n",
" <td>re3data_____::cafc5d99b7c187e24b40d958a16a91f1</td>\n",
" <td>r3d100011894</td>\n",
" <td>Synapse</td>\n",
" <td>re3data</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>dedup::0064f599ed0adb5870a5b3ffe438e485</td>\n",
" <td>16034</td>\n",
" <td>16034</td>\n",
" <td>Giresun University Institutional Repository</td>\n",
" <td>roar</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" dedup_id \\\n",
"0 dedup::29a83a8a9641bb860a679d7e5ba52d26 \n",
"1 dedup::000871c1fc726f0b52dc86a4eeb027de \n",
"2 dedup::000871c1fc726f0b52dc86a4eeb027de \n",
"3 dedup::001e6d882e54c780ce269d3c46997287 \n",
"4 dedup::001e6d882e54c780ce269d3c46997287 \n",
"5 dedup::0023a1e3447fdb31836536cc903f1310 \n",
"6 dedup::0023a1e3447fdb31836536cc903f1310 \n",
"7 dedup::003ab6b40af9b488decea7c582d150a2 \n",
"8 dedup::003ab6b40af9b488decea7c582d150a2 \n",
"9 dedup::0064f599ed0adb5870a5b3ffe438e485 \n",
"\n",
" duplicate_id \\\n",
"0 14174 \n",
"1 4612 \n",
"2 4649 \n",
"3 re3data_____::4af9fe2bb93511a5e0f0c39e94d6557f \n",
"4 https://fairsharing.org/10.25504/FAIRsharing.q... \n",
"5 opendoar____::c6f798b844366ccd65d99bc7f31e0e02 \n",
"6 10013 \n",
"7 https://fairsharing.org/10.25504/FAIRsharing.d... \n",
"8 re3data_____::cafc5d99b7c187e24b40d958a16a91f1 \n",
"9 16034 \n",
"\n",
" original_id \\\n",
"0 14174 \n",
"1 4612 \n",
"2 4649 \n",
"3 r3d100011306 \n",
"4 https://fairsharing.org/10.25504/FAIRsharing.q... \n",
"5 3410 \n",
"6 10013 \n",
"7 https://fairsharing.org/10.25504/FAIRsharing.d... \n",
"8 r3d100011894 \n",
"9 16034 \n",
"\n",
" name source \n",
"0 OHIO Open Library | Ohio University Research roar \n",
"1 IIT Bombay Institutional Repository roar \n",
"2 IIT Bombay Institutional Repository roar \n",
"3 RESID Database of Protein Modifications re3data \n",
"4 RESID Database of Protein Modifications FAIRsharing \n",
"5 erucu: electronic repository of the ukrainian ... OpenDOAR \n",
"6 ErUCU: Electronic repository of the Ukrainian ... roar \n",
"7 Synapse FAIRsharing \n",
"8 Synapse re3data \n",
"9 Giresun University Institutional Repository roar "
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_09 = pd.read_csv('../data/interim/ds_dedup09.csv', sep=';')\n",
"df_09.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dedup_id</th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5115</th>\n",
" <td>dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98</td>\n",
" <td>1126</td>\n",
" <td>1126</td>\n",
" <td>RIT Digital Media Library</td>\n",
" <td>roar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5116</th>\n",
" <td>dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98</td>\n",
" <td>opendoar____::443cb001c138b2561a0d90720d6ce111</td>\n",
" <td>648</td>\n",
" <td>rit digital media library</td>\n",
" <td>OpenDOAR</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" dedup_id \\\n",
"5115 dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 \n",
"5116 dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 \n",
"\n",
" duplicate_id original_id \\\n",
"5115 1126 1126 \n",
"5116 opendoar____::443cb001c138b2561a0d90720d6ce111 648 \n",
"\n",
" name source \n",
"5115 RIT Digital Media Library roar \n",
"5116 rit digital media library OpenDOAR "
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_09[df_09.dedup_id == 'dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98']"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dedup_id</th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>dedup::01846ae470651e97d2f73fce979406a9</td>\n",
" <td>opendoar____::b4d6f2b565ca0eef1f9245403aac366a</td>\n",
" <td>7668</td>\n",
" <td>digital commons at michigan state university c...</td>\n",
" <td>OpenDOAR</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" dedup_id \\\n",
"40 dedup::01846ae470651e97d2f73fce979406a9 \n",
"\n",
" duplicate_id original_id \\\n",
"40 opendoar____::b4d6f2b565ca0eef1f9245403aac366a 7668 \n",
"\n",
" name source \n",
"40 digital commons at michigan state university c... OpenDOAR "
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_09[df_09.dedup_id == 'dedup::01846ae470651e97d2f73fce979406a9']"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"re3data_df = pd.read_csv('../data/raw/re3data.tsv', delimiter='\\t', \n",
" converters={'subject': ast.literal_eval,\n",
" 'keyword': ast.literal_eval,\n",
" 'additional_name': ast.literal_eval,\n",
" 'repository_id': ast.literal_eval,\n",
" 'type': ast.literal_eval,\n",
" 'content_type': ast.literal_eval,\n",
" 'provider_type': ast.literal_eval,\n",
" 'institution': ast.literal_eval\n",
" })"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"opendoar_df = pd.read_csv('../data/raw/openDoar.tsv', delimiter='\\t',\n",
" converters={'subject': ast.literal_eval,\n",
" 'additional_name': ast.literal_eval,\n",
" 'opendoar_id': ast.literal_eval,\n",
" 'content_type': ast.literal_eval,\n",
" 'institution': ast.literal_eval\n",
" })"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"roar_df = pd.read_csv('../data/raw/export_roar_CSV.csv', dtype='str')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"fairsharing_df = pd.read_csv('../data/raw/FAIRsharingDBrec_summary20210304.csv', \n",
" delimiter='|', header=0,\n",
" names=['full_name', 'short_name', 'fs_url', 'url', 'countries', 'subjects'])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dedup_id</th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>dedup::29a83a8a9641bb860a679d7e5ba52d26</td>\n",
" <td>14174</td>\n",
" <td>14174</td>\n",
" <td>OHIO Open Library | Ohio University Research</td>\n",
" <td>roar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>dedup::000871c1fc726f0b52dc86a4eeb027de</td>\n",
" <td>4612</td>\n",
" <td>4612</td>\n",
" <td>IIT Bombay Institutional Repository</td>\n",
" <td>roar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>dedup::000871c1fc726f0b52dc86a4eeb027de</td>\n",
" <td>4649</td>\n",
" <td>4649</td>\n",
" <td>IIT Bombay Institutional Repository</td>\n",
" <td>roar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>dedup::001e6d882e54c780ce269d3c46997287</td>\n",
" <td>re3data_____::4af9fe2bb93511a5e0f0c39e94d6557f</td>\n",
" <td>r3d100011306</td>\n",
" <td>RESID Database of Protein Modifications</td>\n",
" <td>re3data</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>dedup::001e6d882e54c780ce269d3c46997287</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.q...</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.q...</td>\n",
" <td>RESID Database of Protein Modifications</td>\n",
" <td>FAIRsharing</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" dedup_id \\\n",
"0 dedup::29a83a8a9641bb860a679d7e5ba52d26 \n",
"1 dedup::000871c1fc726f0b52dc86a4eeb027de \n",
"2 dedup::000871c1fc726f0b52dc86a4eeb027de \n",
"3 dedup::001e6d882e54c780ce269d3c46997287 \n",
"4 dedup::001e6d882e54c780ce269d3c46997287 \n",
"\n",
" duplicate_id \\\n",
"0 14174 \n",
"1 4612 \n",
"2 4649 \n",
"3 re3data_____::4af9fe2bb93511a5e0f0c39e94d6557f \n",
"4 https://fairsharing.org/10.25504/FAIRsharing.q... \n",
"\n",
" original_id \\\n",
"0 14174 \n",
"1 4612 \n",
"2 4649 \n",
"3 r3d100011306 \n",
"4 https://fairsharing.org/10.25504/FAIRsharing.q... \n",
"\n",
" name source \n",
"0 OHIO Open Library | Ohio University Research roar \n",
"1 IIT Bombay Institutional Repository roar \n",
"2 IIT Bombay Institutional Repository roar \n",
"3 RESID Database of Protein Modifications re3data \n",
"4 RESID Database of Protein Modifications FAIRsharing "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_09.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>dedup::000871c1fc726f0b52dc86a4eeb027de</th>\n",
" <td>[4612, 4649]</td>\n",
" <td>[4612, 4649]</td>\n",
" <td>[IIT Bombay Institutional Repository, IIT Bomb...</td>\n",
" <td>[roar, roar]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::001e6d882e54c780ce269d3c46997287</th>\n",
" <td>[re3data_____::4af9fe2bb93511a5e0f0c39e94d6557...</td>\n",
" <td>[r3d100011306, https://fairsharing.org/10.2550...</td>\n",
" <td>[RESID Database of Protein Modifications, RESI...</td>\n",
" <td>[re3data, FAIRsharing]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::0023a1e3447fdb31836536cc903f1310</th>\n",
" <td>[opendoar____::c6f798b844366ccd65d99bc7f31e0e0...</td>\n",
" <td>[3410, 10013]</td>\n",
" <td>[erucu: electronic repository of the ukrainian...</td>\n",
" <td>[OpenDOAR, roar]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::003ab6b40af9b488decea7c582d150a2</th>\n",
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
" <td>[Synapse, Synapse]</td>\n",
" <td>[FAIRsharing, re3data]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::0064f599ed0adb5870a5b3ffe438e485</th>\n",
" <td>[16034, opendoar____::d1f157379ea7e51d4a8c07af...</td>\n",
" <td>[16034, 9647]</td>\n",
" <td>[Giresun University Institutional Repository, ...</td>\n",
" <td>[roar, OpenDOAR]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::ff49cc40a8890e6a60f40ff3026d2730</th>\n",
" <td>[1333, opendoar____::2bd7f907b7f5b6bbd91822c0c...</td>\n",
" <td>[1333, 1389]</td>\n",
" <td>[UnissResearch, unissresearch]</td>\n",
" <td>[roar, OpenDOAR]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::ff4d70de478038c72282b7e4af1d4260</th>\n",
" <td>[opendoar____::95a0810a93a87065bf7b28490817e9e...</td>\n",
" <td>[9752, 16367]</td>\n",
" <td>[european xfel publication database, European ...</td>\n",
" <td>[OpenDOAR, roar]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::ff826ce6ee85809389f18a5fafe72366</th>\n",
" <td>[opendoar____::62e7f2e090fe150ef8deb4466fdc81b...</td>\n",
" <td>[3601, 2608]</td>\n",
" <td>[electronic odessa national economic universit...</td>\n",
" <td>[OpenDOAR, OpenDOAR]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::ffbd6cbb019a1413183c8d08f2929307</th>\n",
" <td>[3108, opendoar____::ff7d0f525b3be596a51fb9194...</td>\n",
" <td>[3108, 1912]</td>\n",
" <td>[Fotografía Sobre España en el Siglo XIX, foto...</td>\n",
" <td>[roar, OpenDOAR]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98</th>\n",
" <td>[1126, opendoar____::443cb001c138b2561a0d90720...</td>\n",
" <td>[1126, 648]</td>\n",
" <td>[RIT Digital Media Library, rit digital media ...</td>\n",
" <td>[roar, OpenDOAR]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2453 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" duplicate_id \\\n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de [4612, 4649] \n",
"dedup::001e6d882e54c780ce269d3c46997287 [re3data_____::4af9fe2bb93511a5e0f0c39e94d6557... \n",
"dedup::0023a1e3447fdb31836536cc903f1310 [opendoar____::c6f798b844366ccd65d99bc7f31e0e0... \n",
"dedup::003ab6b40af9b488decea7c582d150a2 [https://fairsharing.org/10.25504/FAIRsharing.... \n",
"dedup::0064f599ed0adb5870a5b3ffe438e485 [16034, opendoar____::d1f157379ea7e51d4a8c07af... \n",
"... ... \n",
"dedup::ff49cc40a8890e6a60f40ff3026d2730 [1333, opendoar____::2bd7f907b7f5b6bbd91822c0c... \n",
"dedup::ff4d70de478038c72282b7e4af1d4260 [opendoar____::95a0810a93a87065bf7b28490817e9e... \n",
"dedup::ff826ce6ee85809389f18a5fafe72366 [opendoar____::62e7f2e090fe150ef8deb4466fdc81b... \n",
"dedup::ffbd6cbb019a1413183c8d08f2929307 [3108, opendoar____::ff7d0f525b3be596a51fb9194... \n",
"dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [1126, opendoar____::443cb001c138b2561a0d90720... \n",
"\n",
" original_id \\\n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de [4612, 4649] \n",
"dedup::001e6d882e54c780ce269d3c46997287 [r3d100011306, https://fairsharing.org/10.2550... \n",
"dedup::0023a1e3447fdb31836536cc903f1310 [3410, 10013] \n",
"dedup::003ab6b40af9b488decea7c582d150a2 [https://fairsharing.org/10.25504/FAIRsharing.... \n",
"dedup::0064f599ed0adb5870a5b3ffe438e485 [16034, 9647] \n",
"... ... \n",
"dedup::ff49cc40a8890e6a60f40ff3026d2730 [1333, 1389] \n",
"dedup::ff4d70de478038c72282b7e4af1d4260 [9752, 16367] \n",
"dedup::ff826ce6ee85809389f18a5fafe72366 [3601, 2608] \n",
"dedup::ffbd6cbb019a1413183c8d08f2929307 [3108, 1912] \n",
"dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [1126, 648] \n",
"\n",
" name \\\n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de [IIT Bombay Institutional Repository, IIT Bomb... \n",
"dedup::001e6d882e54c780ce269d3c46997287 [RESID Database of Protein Modifications, RESI... \n",
"dedup::0023a1e3447fdb31836536cc903f1310 [erucu: electronic repository of the ukrainian... \n",
"dedup::003ab6b40af9b488decea7c582d150a2 [Synapse, Synapse] \n",
"dedup::0064f599ed0adb5870a5b3ffe438e485 [Giresun University Institutional Repository, ... \n",
"... ... \n",
"dedup::ff49cc40a8890e6a60f40ff3026d2730 [UnissResearch, unissresearch] \n",
"dedup::ff4d70de478038c72282b7e4af1d4260 [european xfel publication database, European ... \n",
"dedup::ff826ce6ee85809389f18a5fafe72366 [electronic odessa national economic universit... \n",
"dedup::ffbd6cbb019a1413183c8d08f2929307 [Fotografía Sobre España en el Siglo XIX, foto... \n",
"dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [RIT Digital Media Library, rit digital media ... \n",
"\n",
" source \n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de [roar, roar] \n",
"dedup::001e6d882e54c780ce269d3c46997287 [re3data, FAIRsharing] \n",
"dedup::0023a1e3447fdb31836536cc903f1310 [OpenDOAR, roar] \n",
"dedup::003ab6b40af9b488decea7c582d150a2 [FAIRsharing, re3data] \n",
"dedup::0064f599ed0adb5870a5b3ffe438e485 [roar, OpenDOAR] \n",
"... ... \n",
"dedup::ff49cc40a8890e6a60f40ff3026d2730 [roar, OpenDOAR] \n",
"dedup::ff4d70de478038c72282b7e4af1d4260 [OpenDOAR, roar] \n",
"dedup::ff826ce6ee85809389f18a5fafe72366 [OpenDOAR, OpenDOAR] \n",
"dedup::ffbd6cbb019a1413183c8d08f2929307 [roar, OpenDOAR] \n",
"dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [roar, OpenDOAR] \n",
"\n",
"[2453 rows x 4 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup = df_09.groupby('dedup_id').aggregate(list)\n",
"dup"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>dedup::01846ae470651e97d2f73fce979406a9</th>\n",
" <td>[opendoar____::b4d6f2b565ca0eef1f9245403aac366a]</td>\n",
" <td>[7668]</td>\n",
" <td>[digital commons at michigan state university ...</td>\n",
" <td>[OpenDOAR]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::022036087426786cfd0f7f41fa7a2665</th>\n",
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
" <td>[World Data Center for Climate at DRKZ]</td>\n",
" <td>[FAIRsharing]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::07e8b472e1e4af17a6b20ce083baf29f</th>\n",
" <td>[15036]</td>\n",
" <td>[15036]</td>\n",
" <td>[MiCISAN]</td>\n",
" <td>[roar]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::0894634a3244e3050d8057a453e17e57</th>\n",
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
" <td>[European Variation Archive]</td>\n",
" <td>[FAIRsharing]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::0a54b19a13b6712dc04d1b49215423d8</th>\n",
" <td>[opendoar____::d34ab169b70c9dcd35e62896010cd9ff]</td>\n",
" <td>[377]</td>\n",
" <td>[yale medicine thesis digital library]</td>\n",
" <td>[OpenDOAR]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::f8306c8f16096b6d944799f4d427a976</th>\n",
" <td>[re3data_____::574b553c6c374d597d2068ab2b117889]</td>\n",
" <td>[r3d100012041]</td>\n",
" <td>[Canadian Disaster Database]</td>\n",
" <td>[re3data]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::f9d8e2daaa9144310b66bf948e50d656</th>\n",
" <td>[re3data_____::95014789f83d7611ebfddace19d0523a]</td>\n",
" <td>[r3d100011045]</td>\n",
" <td>[Index to Marine &amp; Lacustrine Geological Samples]</td>\n",
" <td>[re3data]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::fcdbc4f504a15df8f78da88ee72fad32</th>\n",
" <td>[opendoar____::9f96f36b7aae3b1ff847c26ac94c604e]</td>\n",
" <td>[4979]</td>\n",
" <td>[university of minnesota law school]</td>\n",
" <td>[OpenDOAR]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::fcfe9c770eb9372e6961a17f7eaffd5f</th>\n",
" <td>[4637]</td>\n",
" <td>[4637]</td>\n",
" <td>[Simon Fraser University Institutional Reposit...</td>\n",
" <td>[roar]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::fe73f687e5bc5280214e0486b273a5f9</th>\n",
" <td>[330]</td>\n",
" <td>[330]</td>\n",
" <td>[DigitalCommons@Fort Lewis College: Scholarshi...</td>\n",
" <td>[roar]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>109 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" duplicate_id \\\n",
"dedup_id \n",
"dedup::01846ae470651e97d2f73fce979406a9 [opendoar____::b4d6f2b565ca0eef1f9245403aac366a] \n",
"dedup::022036087426786cfd0f7f41fa7a2665 [https://fairsharing.org/10.25504/FAIRsharing.... \n",
"dedup::07e8b472e1e4af17a6b20ce083baf29f [15036] \n",
"dedup::0894634a3244e3050d8057a453e17e57 [https://fairsharing.org/10.25504/FAIRsharing.... \n",
"dedup::0a54b19a13b6712dc04d1b49215423d8 [opendoar____::d34ab169b70c9dcd35e62896010cd9ff] \n",
"... ... \n",
"dedup::f8306c8f16096b6d944799f4d427a976 [re3data_____::574b553c6c374d597d2068ab2b117889] \n",
"dedup::f9d8e2daaa9144310b66bf948e50d656 [re3data_____::95014789f83d7611ebfddace19d0523a] \n",
"dedup::fcdbc4f504a15df8f78da88ee72fad32 [opendoar____::9f96f36b7aae3b1ff847c26ac94c604e] \n",
"dedup::fcfe9c770eb9372e6961a17f7eaffd5f [4637] \n",
"dedup::fe73f687e5bc5280214e0486b273a5f9 [330] \n",
"\n",
" original_id \\\n",
"dedup_id \n",
"dedup::01846ae470651e97d2f73fce979406a9 [7668] \n",
"dedup::022036087426786cfd0f7f41fa7a2665 [https://fairsharing.org/10.25504/FAIRsharing.... \n",
"dedup::07e8b472e1e4af17a6b20ce083baf29f [15036] \n",
"dedup::0894634a3244e3050d8057a453e17e57 [https://fairsharing.org/10.25504/FAIRsharing.... \n",
"dedup::0a54b19a13b6712dc04d1b49215423d8 [377] \n",
"... ... \n",
"dedup::f8306c8f16096b6d944799f4d427a976 [r3d100012041] \n",
"dedup::f9d8e2daaa9144310b66bf948e50d656 [r3d100011045] \n",
"dedup::fcdbc4f504a15df8f78da88ee72fad32 [4979] \n",
"dedup::fcfe9c770eb9372e6961a17f7eaffd5f [4637] \n",
"dedup::fe73f687e5bc5280214e0486b273a5f9 [330] \n",
"\n",
" name \\\n",
"dedup_id \n",
"dedup::01846ae470651e97d2f73fce979406a9 [digital commons at michigan state university ... \n",
"dedup::022036087426786cfd0f7f41fa7a2665 [World Data Center for Climate at DRKZ] \n",
"dedup::07e8b472e1e4af17a6b20ce083baf29f [MiCISAN] \n",
"dedup::0894634a3244e3050d8057a453e17e57 [European Variation Archive] \n",
"dedup::0a54b19a13b6712dc04d1b49215423d8 [yale medicine thesis digital library] \n",
"... ... \n",
"dedup::f8306c8f16096b6d944799f4d427a976 [Canadian Disaster Database] \n",
"dedup::f9d8e2daaa9144310b66bf948e50d656 [Index to Marine & Lacustrine Geological Samples] \n",
"dedup::fcdbc4f504a15df8f78da88ee72fad32 [university of minnesota law school] \n",
"dedup::fcfe9c770eb9372e6961a17f7eaffd5f [Simon Fraser University Institutional Reposit... \n",
"dedup::fe73f687e5bc5280214e0486b273a5f9 [DigitalCommons@Fort Lewis College: Scholarshi... \n",
"\n",
" source \n",
"dedup_id \n",
"dedup::01846ae470651e97d2f73fce979406a9 [OpenDOAR] \n",
"dedup::022036087426786cfd0f7f41fa7a2665 [FAIRsharing] \n",
"dedup::07e8b472e1e4af17a6b20ce083baf29f [roar] \n",
"dedup::0894634a3244e3050d8057a453e17e57 [FAIRsharing] \n",
"dedup::0a54b19a13b6712dc04d1b49215423d8 [OpenDOAR] \n",
"... ... \n",
"dedup::f8306c8f16096b6d944799f4d427a976 [re3data] \n",
"dedup::f9d8e2daaa9144310b66bf948e50d656 [re3data] \n",
"dedup::fcdbc4f504a15df8f78da88ee72fad32 [OpenDOAR] \n",
"dedup::fcfe9c770eb9372e6961a17f7eaffd5f [roar] \n",
"dedup::fe73f687e5bc5280214e0486b273a5f9 [roar] \n",
"\n",
"[109 rows x 4 columns]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup[dup.source.str.len() == 1]"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([list(['r3d100013204', 'r3d100013458', 'r3d100012807', 'r3d100012808', 'r3d100012806', 'r3d100012805']),\n",
" list(['243', '5702', '5715', '5694', '5689', '5658', '5710', '5750', '5721', '5704']),\n",
" list(['2738', '4991', '2727', '2729', '2724', '2728', '2740', '174']),\n",
" list(['19', '8', '7', '11', '10', '13', '6', '12', '20', '15', '9', '5', '14', '16'])],\n",
" dtype=object)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup[dup.source.str.len() >= 6].original_id.values"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>[Harvard University, Institute for Quantitativ...</td>\n",
" <td>[Ontario Council of University Libraries, [CBU...</td>\n",
" <td>[Scholars Portal Dataverse, [], CAN, [general]...</td>\n",
" <td>[The Dataverse Project, [], AAA, [technical], ...</td>\n",
" <td>[University of Ottawa, Library, [Université d'...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>[Dataverse Project, [], AAA, [technical], non-...</td>\n",
" <td>[Harvard University, Institute for Quantitave ...</td>\n",
" <td>[Ontario Council of University Libraries, [CBU...</td>\n",
" <td>[University of Toronto, Libraries, [], CAN, [g...</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>[Harvard University, Institute for Quantitativ...</td>\n",
" <td>[Ontario Council of University Libraries, [CBU...</td>\n",
" <td>[Scholars Portal Dataverse, [dataverse@scholar...</td>\n",
" <td>[The Dataverse Project, [], AAA, [technical], ...</td>\n",
" <td>[University of Windsor, [], CAN, [general], no...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>[Harvard University, Institute for Quantitativ...</td>\n",
" <td>[Ontario Council of University Libraries, [CBU...</td>\n",
" <td>[Scholars Portal Dataverse, [], CAN, [general]...</td>\n",
" <td>[The Dataverse Project, [], AAA, [technical], ...</td>\n",
" <td>[University of Waterloo, [], CAN, [general], n...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>[University of Victoria, [UVic], CAN, [general...</td>\n",
" <td>[University of Victoria, Libraries, [], CAN, [...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>[The University of British Columbia, [], CAN, ...</td>\n",
" <td>[University of British Columbia, Library, [], ...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 \\\n",
"0 [Harvard University, Institute for Quantitativ... \n",
"1 [Dataverse Project, [], AAA, [technical], non-... \n",
"2 [Harvard University, Institute for Quantitativ... \n",
"3 [Harvard University, Institute for Quantitativ... \n",
"4 [University of Victoria, [UVic], CAN, [general... \n",
"5 [The University of British Columbia, [], CAN, ... \n",
"\n",
" 1 \\\n",
"0 [Ontario Council of University Libraries, [CBU... \n",
"1 [Harvard University, Institute for Quantitave ... \n",
"2 [Ontario Council of University Libraries, [CBU... \n",
"3 [Ontario Council of University Libraries, [CBU... \n",
"4 [University of Victoria, Libraries, [], CAN, [... \n",
"5 [University of British Columbia, Library, [], ... \n",
"\n",
" 2 \\\n",
"0 [Scholars Portal Dataverse, [], CAN, [general]... \n",
"1 [Ontario Council of University Libraries, [CBU... \n",
"2 [Scholars Portal Dataverse, [dataverse@scholar... \n",
"3 [Scholars Portal Dataverse, [], CAN, [general]... \n",
"4 None \n",
"5 None \n",
"\n",
" 3 \\\n",
"0 [The Dataverse Project, [], AAA, [technical], ... \n",
"1 [University of Toronto, Libraries, [], CAN, [g... \n",
"2 [The Dataverse Project, [], AAA, [technical], ... \n",
"3 [The Dataverse Project, [], AAA, [technical], ... \n",
"4 None \n",
"5 None \n",
"\n",
" 4 \n",
"0 [University of Ottawa, Library, [Université d'... \n",
"1 None \n",
"2 [University of Windsor, [], CAN, [general], no... \n",
"3 [University of Waterloo, [], CAN, [general], n... \n",
"4 None \n",
"5 None "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(re3data_df[re3data_df.re3data_id.isin(['r3d100013204', 'r3d100013458', 'r3d100012807', 'r3d100012808', 'r3d100012806', 'r3d100012805'])].institution.to_list())"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>eprintid</th>\n",
" <th>rev_number</th>\n",
" <th>eprint_status</th>\n",
" <th>userid</th>\n",
" <th>importid</th>\n",
" <th>source</th>\n",
" <th>dir</th>\n",
" <th>datestamp</th>\n",
" <th>lastmod</th>\n",
" <th>status_changed</th>\n",
" <th>type</th>\n",
" <th>succeeds</th>\n",
" <th>commentary</th>\n",
" <th>metadata_visibility</th>\n",
" <th>latitude</th>\n",
" <th>longitude</th>\n",
" <th>relation_type</th>\n",
" <th>relation_uri</th>\n",
" <th>item_issues_id</th>\n",
" <th>item_issues_type</th>\n",
" <th>item_issues_description</th>\n",
" <th>item_issues_timestamp</th>\n",
" <th>item_issues_status</th>\n",
" <th>item_issues_reported_by</th>\n",
" <th>item_issues_resolved_by</th>\n",
" <th>item_issues_comment</th>\n",
" <th>item_issues_count</th>\n",
" <th>sword_depositor</th>\n",
" <th>sword_slug</th>\n",
" <th>exemplar</th>\n",
" <th>home_page</th>\n",
" <th>title</th>\n",
" <th>oai_pmh</th>\n",
" <th>sword_endpoint</th>\n",
" <th>rss_feed</th>\n",
" <th>twitter_feed</th>\n",
" <th>description</th>\n",
" <th>fulltext</th>\n",
" <th>open_access</th>\n",
" <th>mandate</th>\n",
" <th>organisation_title</th>\n",
" <th>organisation_home_page</th>\n",
" <th>location_country</th>\n",
" <th>location_city</th>\n",
" <th>location_latitude</th>\n",
" <th>location_longitude</th>\n",
" <th>software</th>\n",
" <th>geoname</th>\n",
" <th>version</th>\n",
" <th>subjects</th>\n",
" <th>date</th>\n",
" <th>note</th>\n",
" <th>suggestions</th>\n",
" <th>activity_low</th>\n",
" <th>activity_medium</th>\n",
" <th>activity_high</th>\n",
" <th>recordcount</th>\n",
" <th>recordhistory</th>\n",
" <th>fulltexts_total</th>\n",
" <th>fulltexts_docs</th>\n",
" <th>fulltexts_rtotal</th>\n",
" <th>fulltexts_rdocs</th>\n",
" <th>registry_name</th>\n",
" <th>registry_id</th>\n",
" <th>submit_to</th>\n",
" <th>submitted_to_name</th>\n",
" <th>submitted_to_done</th>\n",
" <th>webometrics_rank</th>\n",
" <th>webometrics_size</th>\n",
" <th>webometrics_visibility</th>\n",
" <th>webometrics_rich_files</th>\n",
" <th>webometrics_scholar</th>\n",
" <th>monthly_deposits</th>\n",
" <th>total_deposits</th>\n",
" <th>association</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>4167</th>\n",
" <td>4612</td>\n",
" <td>28</td>\n",
" <td>archive</td>\n",
" <td>1380</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/46/12</td>\n",
" <td>2012-01-08 03:17:02</td>\n",
" <td>2012-04-16 10:53:04</td>\n",
" <td>2012-01-08 03:17:02</td>\n",
" <td>institutional</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://dspace.library.iitb.ac.in/jspui/</td>\n",
" <td>IIT Bombay Institutional Repository</td>\n",
" <td>http://dspace.library.iitb.ac.in/oai/request</td>\n",
" <td>NaN</td>\n",
" <td>http://dspace.library.iitb.ac.in/xmlui/feed/at...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>IIT Bombay</td>\n",
" <td>http://www.iitb.ac.in</td>\n",
" <td>in</td>\n",
" <td>Mumbai</td>\n",
" <td>19.133</td>\n",
" <td>72.9166</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_IN</td>\n",
" <td>other</td>\n",
" <td>TA</td>\n",
" <td>2011-12-15 09:01:35</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>99</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,95,9...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>4790</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4168</th>\n",
" <td>4612</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TD</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4169</th>\n",
" <td>4612</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TH</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4170</th>\n",
" <td>4612</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TJ</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4171</th>\n",
" <td>4612</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TK</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4172</th>\n",
" <td>4612</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4173</th>\n",
" <td>4612</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TP</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16436</th>\n",
" <td>4649</td>\n",
" <td>8</td>\n",
" <td>archive</td>\n",
" <td>1380</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/46/49</td>\n",
" <td>2012-02-05 13:57:01</td>\n",
" <td>2012-04-16 10:39:58</td>\n",
" <td>2012-02-05 13:57:01</td>\n",
" <td>institutional</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://dspace.library.iitb.ac.in/jspui/</td>\n",
" <td>IIT Bombay Institutional Repository</td>\n",
" <td>http://dspace.library.iitb.ac.in/oai</td>\n",
" <td>NaN</td>\n",
" <td>http://dspace.library.iitb.ac.in/xmlui/feed/rs...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>FALSE</td>\n",
" <td>IIT Bombay</td>\n",
" <td>http://www.iitb.ac.in</td>\n",
" <td>in</td>\n",
" <td>Mumbai</td>\n",
" <td>19.133</td>\n",
" <td>72.9166</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_IN</td>\n",
" <td>other</td>\n",
" <td>T1</td>\n",
" <td>2012-01-05 12:09:37</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>4789</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16437</th>\n",
" <td>4649</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TA</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" eprintid rev_number eprint_status userid importid source \\\n",
"4167 4612 28 archive 1380 NaN NaN \n",
"4168 4612 NaN NaN NaN NaN NaN \n",
"4169 4612 NaN NaN NaN NaN NaN \n",
"4170 4612 NaN NaN NaN NaN NaN \n",
"4171 4612 NaN NaN NaN NaN NaN \n",
"4172 4612 NaN NaN NaN NaN NaN \n",
"4173 4612 NaN NaN NaN NaN NaN \n",
"16436 4649 8 archive 1380 NaN NaN \n",
"16437 4649 NaN NaN NaN NaN NaN \n",
"\n",
" dir datestamp lastmod \\\n",
"4167 disk0/00/00/46/12 2012-01-08 03:17:02 2012-04-16 10:53:04 \n",
"4168 NaN NaN NaN \n",
"4169 NaN NaN NaN \n",
"4170 NaN NaN NaN \n",
"4171 NaN NaN NaN \n",
"4172 NaN NaN NaN \n",
"4173 NaN NaN NaN \n",
"16436 disk0/00/00/46/49 2012-02-05 13:57:01 2012-04-16 10:39:58 \n",
"16437 NaN NaN NaN \n",
"\n",
" status_changed type succeeds commentary \\\n",
"4167 2012-01-08 03:17:02 institutional NaN NaN \n",
"4168 NaN NaN NaN NaN \n",
"4169 NaN NaN NaN NaN \n",
"4170 NaN NaN NaN NaN \n",
"4171 NaN NaN NaN NaN \n",
"4172 NaN NaN NaN NaN \n",
"4173 NaN NaN NaN NaN \n",
"16436 2012-02-05 13:57:01 institutional NaN NaN \n",
"16437 NaN NaN NaN NaN \n",
"\n",
" metadata_visibility latitude longitude relation_type relation_uri \\\n",
"4167 show NaN NaN NaN NaN \n",
"4168 NaN NaN NaN NaN NaN \n",
"4169 NaN NaN NaN NaN NaN \n",
"4170 NaN NaN NaN NaN NaN \n",
"4171 NaN NaN NaN NaN NaN \n",
"4172 NaN NaN NaN NaN NaN \n",
"4173 NaN NaN NaN NaN NaN \n",
"16436 show NaN NaN NaN NaN \n",
"16437 NaN NaN NaN NaN NaN \n",
"\n",
" item_issues_id item_issues_type item_issues_description \\\n",
"4167 NaN NaN NaN \n",
"4168 NaN NaN NaN \n",
"4169 NaN NaN NaN \n",
"4170 NaN NaN NaN \n",
"4171 NaN NaN NaN \n",
"4172 NaN NaN NaN \n",
"4173 NaN NaN NaN \n",
"16436 NaN NaN NaN \n",
"16437 NaN NaN NaN \n",
"\n",
" item_issues_timestamp item_issues_status item_issues_reported_by \\\n",
"4167 NaN NaN NaN \n",
"4168 NaN NaN NaN \n",
"4169 NaN NaN NaN \n",
"4170 NaN NaN NaN \n",
"4171 NaN NaN NaN \n",
"4172 NaN NaN NaN \n",
"4173 NaN NaN NaN \n",
"16436 NaN NaN NaN \n",
"16437 NaN NaN NaN \n",
"\n",
" item_issues_resolved_by item_issues_comment item_issues_count \\\n",
"4167 NaN NaN NaN \n",
"4168 NaN NaN NaN \n",
"4169 NaN NaN NaN \n",
"4170 NaN NaN NaN \n",
"4171 NaN NaN NaN \n",
"4172 NaN NaN NaN \n",
"4173 NaN NaN NaN \n",
"16436 NaN NaN NaN \n",
"16437 NaN NaN NaN \n",
"\n",
" sword_depositor sword_slug exemplar \\\n",
"4167 NaN NaN NaN \n",
"4168 NaN NaN NaN \n",
"4169 NaN NaN NaN \n",
"4170 NaN NaN NaN \n",
"4171 NaN NaN NaN \n",
"4172 NaN NaN NaN \n",
"4173 NaN NaN NaN \n",
"16436 NaN NaN NaN \n",
"16437 NaN NaN NaN \n",
"\n",
" home_page \\\n",
"4167 http://dspace.library.iitb.ac.in/jspui/ \n",
"4168 NaN \n",
"4169 NaN \n",
"4170 NaN \n",
"4171 NaN \n",
"4172 NaN \n",
"4173 NaN \n",
"16436 http://dspace.library.iitb.ac.in/jspui/ \n",
"16437 NaN \n",
"\n",
" title \\\n",
"4167 IIT Bombay Institutional Repository \n",
"4168 NaN \n",
"4169 NaN \n",
"4170 NaN \n",
"4171 NaN \n",
"4172 NaN \n",
"4173 NaN \n",
"16436 IIT Bombay Institutional Repository \n",
"16437 NaN \n",
"\n",
" oai_pmh sword_endpoint \\\n",
"4167 http://dspace.library.iitb.ac.in/oai/request NaN \n",
"4168 NaN NaN \n",
"4169 NaN NaN \n",
"4170 NaN NaN \n",
"4171 NaN NaN \n",
"4172 NaN NaN \n",
"4173 NaN NaN \n",
"16436 http://dspace.library.iitb.ac.in/oai NaN \n",
"16437 NaN NaN \n",
"\n",
" rss_feed twitter_feed \\\n",
"4167 http://dspace.library.iitb.ac.in/xmlui/feed/at... NaN \n",
"4168 NaN NaN \n",
"4169 NaN NaN \n",
"4170 NaN NaN \n",
"4171 NaN NaN \n",
"4172 NaN NaN \n",
"4173 NaN NaN \n",
"16436 http://dspace.library.iitb.ac.in/xmlui/feed/rs... NaN \n",
"16437 NaN NaN \n",
"\n",
" description fulltext open_access mandate organisation_title \\\n",
"4167 NaN TRUE TRUE TRUE IIT Bombay \n",
"4168 NaN NaN NaN NaN NaN \n",
"4169 NaN NaN NaN NaN NaN \n",
"4170 NaN NaN NaN NaN NaN \n",
"4171 NaN NaN NaN NaN NaN \n",
"4172 NaN NaN NaN NaN NaN \n",
"4173 NaN NaN NaN NaN NaN \n",
"16436 NaN TRUE TRUE FALSE IIT Bombay \n",
"16437 NaN NaN NaN NaN NaN \n",
"\n",
" organisation_home_page location_country location_city location_latitude \\\n",
"4167 http://www.iitb.ac.in in Mumbai 19.133 \n",
"4168 NaN NaN NaN NaN \n",
"4169 NaN NaN NaN NaN \n",
"4170 NaN NaN NaN NaN \n",
"4171 NaN NaN NaN NaN \n",
"4172 NaN NaN NaN NaN \n",
"4173 NaN NaN NaN NaN \n",
"16436 http://www.iitb.ac.in in Mumbai 19.133 \n",
"16437 NaN NaN NaN NaN \n",
"\n",
" location_longitude software geoname version subjects \\\n",
"4167 72.9166 dspace geoname_2_IN other TA \n",
"4168 NaN NaN NaN NaN TD \n",
"4169 NaN NaN NaN NaN TH \n",
"4170 NaN NaN NaN NaN TJ \n",
"4171 NaN NaN NaN NaN TK \n",
"4172 NaN NaN NaN NaN TN \n",
"4173 NaN NaN NaN NaN TP \n",
"16436 72.9166 dspace geoname_2_IN other T1 \n",
"16437 NaN NaN NaN NaN TA \n",
"\n",
" date note suggestions activity_low activity_medium \\\n",
"4167 2011-12-15 09:01:35 NaN NaN 0 0 \n",
"4168 NaN NaN NaN NaN NaN \n",
"4169 NaN NaN NaN NaN NaN \n",
"4170 NaN NaN NaN NaN NaN \n",
"4171 NaN NaN NaN NaN NaN \n",
"4172 NaN NaN NaN NaN NaN \n",
"4173 NaN NaN NaN NaN NaN \n",
"16436 2012-01-05 12:09:37 NaN NaN NaN NaN \n",
"16437 NaN NaN NaN NaN NaN \n",
"\n",
" activity_high recordcount \\\n",
"4167 0 99 \n",
"4168 NaN NaN \n",
"4169 NaN NaN \n",
"4170 NaN NaN \n",
"4171 NaN NaN \n",
"4172 NaN NaN \n",
"4173 NaN NaN \n",
"16436 NaN NaN \n",
"16437 NaN NaN \n",
"\n",
" recordhistory fulltexts_total \\\n",
"4167 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,95,9... NaN \n",
"4168 NaN NaN \n",
"4169 NaN NaN \n",
"4170 NaN NaN \n",
"4171 NaN NaN \n",
"4172 NaN NaN \n",
"4173 NaN NaN \n",
"16436 NaN NaN \n",
"16437 NaN NaN \n",
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name \\\n",
"4167 NaN NaN NaN celestial \n",
"4168 NaN NaN NaN NaN \n",
"4169 NaN NaN NaN NaN \n",
"4170 NaN NaN NaN NaN \n",
"4171 NaN NaN NaN NaN \n",
"4172 NaN NaN NaN NaN \n",
"4173 NaN NaN NaN NaN \n",
"16436 NaN NaN NaN celestial \n",
"16437 NaN NaN NaN NaN \n",
"\n",
" registry_id submit_to submitted_to_name submitted_to_done \\\n",
"4167 4790 NaN NaN NaN \n",
"4168 NaN NaN NaN NaN \n",
"4169 NaN NaN NaN NaN \n",
"4170 NaN NaN NaN NaN \n",
"4171 NaN NaN NaN NaN \n",
"4172 NaN NaN NaN NaN \n",
"4173 NaN NaN NaN NaN \n",
"16436 4789 NaN NaN NaN \n",
"16437 NaN NaN NaN NaN \n",
"\n",
" webometrics_rank webometrics_size webometrics_visibility \\\n",
"4167 NaN NaN NaN \n",
"4168 NaN NaN NaN \n",
"4169 NaN NaN NaN \n",
"4170 NaN NaN NaN \n",
"4171 NaN NaN NaN \n",
"4172 NaN NaN NaN \n",
"4173 NaN NaN NaN \n",
"16436 NaN NaN NaN \n",
"16437 NaN NaN NaN \n",
"\n",
" webometrics_rich_files webometrics_scholar monthly_deposits \\\n",
"4167 NaN NaN NaN \n",
"4168 NaN NaN NaN \n",
"4169 NaN NaN NaN \n",
"4170 NaN NaN NaN \n",
"4171 NaN NaN NaN \n",
"4172 NaN NaN NaN \n",
"4173 NaN NaN NaN \n",
"16436 NaN NaN NaN \n",
"16437 NaN NaN NaN \n",
"\n",
" total_deposits association \n",
"4167 NaN NaN \n",
"4168 NaN NaN \n",
"4169 NaN NaN \n",
"4170 NaN NaN \n",
"4171 NaN NaN \n",
"4172 NaN NaN \n",
"4173 NaN NaN \n",
"16436 NaN NaN \n",
"16437 NaN NaN "
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df[roar_df.eprintid.isin(['4612', '4649'])]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>openaire_id</th>\n",
" <th>re3data_id</th>\n",
" <th>repository_name</th>\n",
" <th>additional_name</th>\n",
" <th>repository_url</th>\n",
" <th>repository_id</th>\n",
" <th>description</th>\n",
" <th>type</th>\n",
" <th>size</th>\n",
" <th>update_date</th>\n",
" <th>start_date</th>\n",
" <th>end_date</th>\n",
" <th>subject</th>\n",
" <th>mission_statement</th>\n",
" <th>content_type</th>\n",
" <th>provider_type</th>\n",
" <th>keyword</th>\n",
" <th>institution</th>\n",
" <th>policy</th>\n",
" <th>database_access</th>\n",
" <th>database_license</th>\n",
" <th>data_access</th>\n",
" <th>data_license</th>\n",
" <th>data_upload</th>\n",
" <th>data_upload_license</th>\n",
" <th>software</th>\n",
" <th>versioning</th>\n",
" <th>api</th>\n",
" <th>pid_system</th>\n",
" <th>citation_guideline_url</th>\n",
" <th>aid_system</th>\n",
" <th>enhanced_publication</th>\n",
" <th>quality_management</th>\n",
" <th>certificate</th>\n",
" <th>metadata_standard</th>\n",
" <th>syndication</th>\n",
" <th>remarks</th>\n",
" <th>entry_date</th>\n",
" <th>last_update</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1090</th>\n",
" <td>re3data_____::4af9fe2bb93511a5e0f0c39e94d6557f</td>\n",
" <td>r3d100011306</td>\n",
" <td>RESID Database of Protein Modifications</td>\n",
" <td>[]</td>\n",
" <td>https://pir.georgetown.edu/resid/resid.shtml</td>\n",
" <td>[FAIRsharing_doi:10.25504/FAIRsharing.qaszjp, ...</td>\n",
" <td>The RESID Database of Protein Modifications is...</td>\n",
" <td>[disciplinary]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2014</td>\n",
" <td>NaN</td>\n",
" <td>[2 Life Sciences, 201 Basic Biological and Med...</td>\n",
" <td>false</td>\n",
" <td>[Images, Structured text]</td>\n",
" <td>[dataProvider]</td>\n",
" <td>[genomes, life sciences, proteins, proteomes, ...</td>\n",
" <td>[[Georgetown University, Medical Center, [GUMC...</td>\n",
" <td>true</td>\n",
" <td>true</td>\n",
" <td>false</td>\n",
" <td>true</td>\n",
" <td>true</td>\n",
" <td>true</td>\n",
" <td>false</td>\n",
" <td>true</td>\n",
" <td>yes</td>\n",
" <td>true</td>\n",
" <td>true</td>\n",
" <td>true</td>\n",
" <td>true</td>\n",
" <td>yes</td>\n",
" <td>unknown</td>\n",
" <td>false</td>\n",
" <td>false</td>\n",
" <td>false</td>\n",
" <td>RESID is covered by Thomson Reuters Data Citat...</td>\n",
" <td>2014-12-05</td>\n",
" <td>2019-01-17</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" openaire_id re3data_id \\\n",
"1090 re3data_____::4af9fe2bb93511a5e0f0c39e94d6557f r3d100011306 \n",
"\n",
" repository_name additional_name \\\n",
"1090 RESID Database of Protein Modifications [] \n",
"\n",
" repository_url \\\n",
"1090 https://pir.georgetown.edu/resid/resid.shtml \n",
"\n",
" repository_id \\\n",
"1090 [FAIRsharing_doi:10.25504/FAIRsharing.qaszjp, ... \n",
"\n",
" description type size \\\n",
"1090 The RESID Database of Protein Modifications is... [disciplinary] NaN \n",
"\n",
" update_date start_date end_date \\\n",
"1090 NaN 2014 NaN \n",
"\n",
" subject mission_statement \\\n",
"1090 [2 Life Sciences, 201 Basic Biological and Med... false \n",
"\n",
" content_type provider_type \\\n",
"1090 [Images, Structured text] [dataProvider] \n",
"\n",
" keyword \\\n",
"1090 [genomes, life sciences, proteins, proteomes, ... \n",
"\n",
" institution policy \\\n",
"1090 [[Georgetown University, Medical Center, [GUMC... true \n",
"\n",
" database_access database_license data_access data_license data_upload \\\n",
"1090 true false true true true \n",
"\n",
" data_upload_license software versioning api pid_system \\\n",
"1090 false true yes true true \n",
"\n",
" citation_guideline_url aid_system enhanced_publication \\\n",
"1090 true true yes \n",
"\n",
" quality_management certificate metadata_standard syndication \\\n",
"1090 unknown false false false \n",
"\n",
" remarks entry_date \\\n",
"1090 RESID is covered by Thomson Reuters Data Citat... 2014-12-05 \n",
"\n",
" last_update \n",
"1090 2019-01-17 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"re3data_df[re3data_df.re3data_id == 'r3d100011306']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}