{ "cells": [ { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "import ast\n", "import csv\n", "import json\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import matplotlib.pyplot as plt\n", "from matplotlib_venn import venn2, venn2_circles\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px\n", "\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dedup_idduplicate_idoriginal_idnamesource
0dedup::29a83a8a9641bb860a679d7e5ba52d261417414174OHIO Open Library | Ohio University Researchroar
1dedup::000871c1fc726f0b52dc86a4eeb027de46124612IIT Bombay Institutional Repositoryroar
2dedup::000871c1fc726f0b52dc86a4eeb027de46494649IIT Bombay Institutional Repositoryroar
3dedup::001e6d882e54c780ce269d3c46997287re3data_____::4af9fe2bb93511a5e0f0c39e94d6557fr3d100011306RESID Database of Protein Modificationsre3data
4dedup::001e6d882e54c780ce269d3c46997287https://fairsharing.org/10.25504/FAIRsharing.q...https://fairsharing.org/10.25504/FAIRsharing.q...RESID Database of Protein ModificationsFAIRsharing
5dedup::0023a1e3447fdb31836536cc903f1310opendoar____::c6f798b844366ccd65d99bc7f31e0e023410erucu: electronic repository of the ukrainian ...OpenDOAR
6dedup::0023a1e3447fdb31836536cc903f13101001310013ErUCU: Electronic repository of the Ukrainian ...roar
7dedup::003ab6b40af9b488decea7c582d150a2https://fairsharing.org/10.25504/FAIRsharing.d...https://fairsharing.org/10.25504/FAIRsharing.d...SynapseFAIRsharing
8dedup::003ab6b40af9b488decea7c582d150a2re3data_____::cafc5d99b7c187e24b40d958a16a91f1r3d100011894Synapsere3data
9dedup::0064f599ed0adb5870a5b3ffe438e4851603416034Giresun University Institutional Repositoryroar
\n", "
" ], "text/plain": [ " dedup_id \\\n", "0 dedup::29a83a8a9641bb860a679d7e5ba52d26 \n", "1 dedup::000871c1fc726f0b52dc86a4eeb027de \n", "2 dedup::000871c1fc726f0b52dc86a4eeb027de \n", "3 dedup::001e6d882e54c780ce269d3c46997287 \n", "4 dedup::001e6d882e54c780ce269d3c46997287 \n", "5 dedup::0023a1e3447fdb31836536cc903f1310 \n", "6 dedup::0023a1e3447fdb31836536cc903f1310 \n", "7 dedup::003ab6b40af9b488decea7c582d150a2 \n", "8 dedup::003ab6b40af9b488decea7c582d150a2 \n", "9 dedup::0064f599ed0adb5870a5b3ffe438e485 \n", "\n", " duplicate_id \\\n", "0 14174 \n", "1 4612 \n", "2 4649 \n", "3 re3data_____::4af9fe2bb93511a5e0f0c39e94d6557f \n", "4 https://fairsharing.org/10.25504/FAIRsharing.q... \n", "5 opendoar____::c6f798b844366ccd65d99bc7f31e0e02 \n", "6 10013 \n", "7 https://fairsharing.org/10.25504/FAIRsharing.d... \n", "8 re3data_____::cafc5d99b7c187e24b40d958a16a91f1 \n", "9 16034 \n", "\n", " original_id \\\n", "0 14174 \n", "1 4612 \n", "2 4649 \n", "3 r3d100011306 \n", "4 https://fairsharing.org/10.25504/FAIRsharing.q... \n", "5 3410 \n", "6 10013 \n", "7 https://fairsharing.org/10.25504/FAIRsharing.d... \n", "8 r3d100011894 \n", "9 16034 \n", "\n", " name source \n", "0 OHIO Open Library | Ohio University Research roar \n", "1 IIT Bombay Institutional Repository roar \n", "2 IIT Bombay Institutional Repository roar \n", "3 RESID Database of Protein Modifications re3data \n", "4 RESID Database of Protein Modifications FAIRsharing \n", "5 erucu: electronic repository of the ukrainian ... OpenDOAR \n", "6 ErUCU: Electronic repository of the Ukrainian ... roar \n", "7 Synapse FAIRsharing \n", "8 Synapse re3data \n", "9 Giresun University Institutional Repository roar " ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_09 = pd.read_csv('../data/interim/ds_dedup09.csv', sep=';')\n", "df_09.head(10)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dedup_idduplicate_idoriginal_idnamesource
5115dedup::ffeed84c7cb1ae7bf4ec4bd78275bb9811261126RIT Digital Media Libraryroar
5116dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98opendoar____::443cb001c138b2561a0d90720d6ce111648rit digital media libraryOpenDOAR
\n", "
" ], "text/plain": [ " dedup_id \\\n", "5115 dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 \n", "5116 dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 \n", "\n", " duplicate_id original_id \\\n", "5115 1126 1126 \n", "5116 opendoar____::443cb001c138b2561a0d90720d6ce111 648 \n", "\n", " name source \n", "5115 RIT Digital Media Library roar \n", "5116 rit digital media library OpenDOAR " ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_09[df_09.dedup_id == 'dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98']" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dedup_idduplicate_idoriginal_idnamesource
40dedup::01846ae470651e97d2f73fce979406a9opendoar____::b4d6f2b565ca0eef1f9245403aac366a7668digital commons at michigan state university c...OpenDOAR
\n", "
" ], "text/plain": [ " dedup_id \\\n", "40 dedup::01846ae470651e97d2f73fce979406a9 \n", "\n", " duplicate_id original_id \\\n", "40 opendoar____::b4d6f2b565ca0eef1f9245403aac366a 7668 \n", "\n", " name source \n", "40 digital commons at michigan state university c... OpenDOAR " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_09[df_09.dedup_id == 'dedup::01846ae470651e97d2f73fce979406a9']" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "re3data_df = pd.read_csv('../data/raw/re3data.tsv', delimiter='\\t', \n", " converters={'subject': ast.literal_eval,\n", " 'keyword': ast.literal_eval,\n", " 'additional_name': ast.literal_eval,\n", " 'repository_id': ast.literal_eval,\n", " 'type': ast.literal_eval,\n", " 'content_type': ast.literal_eval,\n", " 'provider_type': ast.literal_eval,\n", " 'institution': ast.literal_eval\n", " })" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "opendoar_df = pd.read_csv('../data/raw/openDoar.tsv', delimiter='\\t',\n", " converters={'subject': ast.literal_eval,\n", " 'additional_name': ast.literal_eval,\n", " 'opendoar_id': ast.literal_eval,\n", " 'content_type': ast.literal_eval,\n", " 'institution': ast.literal_eval\n", " })" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "roar_df = pd.read_csv('../data/raw/export_roar_CSV.csv', dtype='str')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "fairsharing_df = pd.read_csv('../data/raw/FAIRsharingDBrec_summary20210304.csv', \n", " delimiter='|', header=0,\n", " names=['full_name', 'short_name', 'fs_url', 'url', 'countries', 'subjects'])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dedup_idduplicate_idoriginal_idnamesource
0dedup::29a83a8a9641bb860a679d7e5ba52d261417414174OHIO Open Library | Ohio University Researchroar
1dedup::000871c1fc726f0b52dc86a4eeb027de46124612IIT Bombay Institutional Repositoryroar
2dedup::000871c1fc726f0b52dc86a4eeb027de46494649IIT Bombay Institutional Repositoryroar
3dedup::001e6d882e54c780ce269d3c46997287re3data_____::4af9fe2bb93511a5e0f0c39e94d6557fr3d100011306RESID Database of Protein Modificationsre3data
4dedup::001e6d882e54c780ce269d3c46997287https://fairsharing.org/10.25504/FAIRsharing.q...https://fairsharing.org/10.25504/FAIRsharing.q...RESID Database of Protein ModificationsFAIRsharing
\n", "
" ], "text/plain": [ " dedup_id \\\n", "0 dedup::29a83a8a9641bb860a679d7e5ba52d26 \n", "1 dedup::000871c1fc726f0b52dc86a4eeb027de \n", "2 dedup::000871c1fc726f0b52dc86a4eeb027de \n", "3 dedup::001e6d882e54c780ce269d3c46997287 \n", "4 dedup::001e6d882e54c780ce269d3c46997287 \n", "\n", " duplicate_id \\\n", "0 14174 \n", "1 4612 \n", "2 4649 \n", "3 re3data_____::4af9fe2bb93511a5e0f0c39e94d6557f \n", "4 https://fairsharing.org/10.25504/FAIRsharing.q... \n", "\n", " original_id \\\n", "0 14174 \n", "1 4612 \n", "2 4649 \n", "3 r3d100011306 \n", "4 https://fairsharing.org/10.25504/FAIRsharing.q... \n", "\n", " name source \n", "0 OHIO Open Library | Ohio University Research roar \n", "1 IIT Bombay Institutional Repository roar \n", "2 IIT Bombay Institutional Repository roar \n", "3 RESID Database of Protein Modifications re3data \n", "4 RESID Database of Protein Modifications FAIRsharing " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_09.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
duplicate_idoriginal_idnamesource
dedup_id
dedup::000871c1fc726f0b52dc86a4eeb027de[4612, 4649][4612, 4649][IIT Bombay Institutional Repository, IIT Bomb...[roar, roar]
dedup::001e6d882e54c780ce269d3c46997287[re3data_____::4af9fe2bb93511a5e0f0c39e94d6557...[r3d100011306, https://fairsharing.org/10.2550...[RESID Database of Protein Modifications, RESI...[re3data, FAIRsharing]
dedup::0023a1e3447fdb31836536cc903f1310[opendoar____::c6f798b844366ccd65d99bc7f31e0e0...[3410, 10013][erucu: electronic repository of the ukrainian...[OpenDOAR, roar]
dedup::003ab6b40af9b488decea7c582d150a2[https://fairsharing.org/10.25504/FAIRsharing....[https://fairsharing.org/10.25504/FAIRsharing....[Synapse, Synapse][FAIRsharing, re3data]
dedup::0064f599ed0adb5870a5b3ffe438e485[16034, opendoar____::d1f157379ea7e51d4a8c07af...[16034, 9647][Giresun University Institutional Repository, ...[roar, OpenDOAR]
...............
dedup::ff49cc40a8890e6a60f40ff3026d2730[1333, opendoar____::2bd7f907b7f5b6bbd91822c0c...[1333, 1389][UnissResearch, unissresearch][roar, OpenDOAR]
dedup::ff4d70de478038c72282b7e4af1d4260[opendoar____::95a0810a93a87065bf7b28490817e9e...[9752, 16367][european xfel publication database, European ...[OpenDOAR, roar]
dedup::ff826ce6ee85809389f18a5fafe72366[opendoar____::62e7f2e090fe150ef8deb4466fdc81b...[3601, 2608][electronic odessa national economic universit...[OpenDOAR, OpenDOAR]
dedup::ffbd6cbb019a1413183c8d08f2929307[3108, opendoar____::ff7d0f525b3be596a51fb9194...[3108, 1912][Fotografía Sobre España en el Siglo XIX, foto...[roar, OpenDOAR]
dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98[1126, opendoar____::443cb001c138b2561a0d90720...[1126, 648][RIT Digital Media Library, rit digital media ...[roar, OpenDOAR]
\n", "

2453 rows × 4 columns

\n", "
" ], "text/plain": [ " duplicate_id \\\n", "dedup_id \n", "dedup::000871c1fc726f0b52dc86a4eeb027de [4612, 4649] \n", "dedup::001e6d882e54c780ce269d3c46997287 [re3data_____::4af9fe2bb93511a5e0f0c39e94d6557... \n", "dedup::0023a1e3447fdb31836536cc903f1310 [opendoar____::c6f798b844366ccd65d99bc7f31e0e0... \n", "dedup::003ab6b40af9b488decea7c582d150a2 [https://fairsharing.org/10.25504/FAIRsharing.... \n", "dedup::0064f599ed0adb5870a5b3ffe438e485 [16034, opendoar____::d1f157379ea7e51d4a8c07af... \n", "... ... \n", "dedup::ff49cc40a8890e6a60f40ff3026d2730 [1333, opendoar____::2bd7f907b7f5b6bbd91822c0c... \n", "dedup::ff4d70de478038c72282b7e4af1d4260 [opendoar____::95a0810a93a87065bf7b28490817e9e... \n", "dedup::ff826ce6ee85809389f18a5fafe72366 [opendoar____::62e7f2e090fe150ef8deb4466fdc81b... \n", "dedup::ffbd6cbb019a1413183c8d08f2929307 [3108, opendoar____::ff7d0f525b3be596a51fb9194... \n", "dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [1126, opendoar____::443cb001c138b2561a0d90720... \n", "\n", " original_id \\\n", "dedup_id \n", "dedup::000871c1fc726f0b52dc86a4eeb027de [4612, 4649] \n", "dedup::001e6d882e54c780ce269d3c46997287 [r3d100011306, https://fairsharing.org/10.2550... \n", "dedup::0023a1e3447fdb31836536cc903f1310 [3410, 10013] \n", "dedup::003ab6b40af9b488decea7c582d150a2 [https://fairsharing.org/10.25504/FAIRsharing.... \n", "dedup::0064f599ed0adb5870a5b3ffe438e485 [16034, 9647] \n", "... ... \n", "dedup::ff49cc40a8890e6a60f40ff3026d2730 [1333, 1389] \n", "dedup::ff4d70de478038c72282b7e4af1d4260 [9752, 16367] \n", "dedup::ff826ce6ee85809389f18a5fafe72366 [3601, 2608] \n", "dedup::ffbd6cbb019a1413183c8d08f2929307 [3108, 1912] \n", "dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [1126, 648] \n", "\n", " name \\\n", "dedup_id \n", "dedup::000871c1fc726f0b52dc86a4eeb027de [IIT Bombay Institutional Repository, IIT Bomb... \n", "dedup::001e6d882e54c780ce269d3c46997287 [RESID Database of Protein Modifications, RESI... \n", "dedup::0023a1e3447fdb31836536cc903f1310 [erucu: electronic repository of the ukrainian... \n", "dedup::003ab6b40af9b488decea7c582d150a2 [Synapse, Synapse] \n", "dedup::0064f599ed0adb5870a5b3ffe438e485 [Giresun University Institutional Repository, ... \n", "... ... \n", "dedup::ff49cc40a8890e6a60f40ff3026d2730 [UnissResearch, unissresearch] \n", "dedup::ff4d70de478038c72282b7e4af1d4260 [european xfel publication database, European ... \n", "dedup::ff826ce6ee85809389f18a5fafe72366 [electronic odessa national economic universit... \n", "dedup::ffbd6cbb019a1413183c8d08f2929307 [Fotografía Sobre España en el Siglo XIX, foto... \n", "dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [RIT Digital Media Library, rit digital media ... \n", "\n", " source \n", "dedup_id \n", "dedup::000871c1fc726f0b52dc86a4eeb027de [roar, roar] \n", "dedup::001e6d882e54c780ce269d3c46997287 [re3data, FAIRsharing] \n", "dedup::0023a1e3447fdb31836536cc903f1310 [OpenDOAR, roar] \n", "dedup::003ab6b40af9b488decea7c582d150a2 [FAIRsharing, re3data] \n", "dedup::0064f599ed0adb5870a5b3ffe438e485 [roar, OpenDOAR] \n", "... ... \n", "dedup::ff49cc40a8890e6a60f40ff3026d2730 [roar, OpenDOAR] \n", "dedup::ff4d70de478038c72282b7e4af1d4260 [OpenDOAR, roar] \n", "dedup::ff826ce6ee85809389f18a5fafe72366 [OpenDOAR, OpenDOAR] \n", "dedup::ffbd6cbb019a1413183c8d08f2929307 [roar, OpenDOAR] \n", "dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [roar, OpenDOAR] \n", "\n", "[2453 rows x 4 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dup = df_09.groupby('dedup_id').aggregate(list)\n", "dup" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
duplicate_idoriginal_idnamesource
dedup_id
dedup::01846ae470651e97d2f73fce979406a9[opendoar____::b4d6f2b565ca0eef1f9245403aac366a][7668][digital commons at michigan state university ...[OpenDOAR]
dedup::022036087426786cfd0f7f41fa7a2665[https://fairsharing.org/10.25504/FAIRsharing....[https://fairsharing.org/10.25504/FAIRsharing....[World Data Center for Climate at DRKZ][FAIRsharing]
dedup::07e8b472e1e4af17a6b20ce083baf29f[15036][15036][MiCISAN][roar]
dedup::0894634a3244e3050d8057a453e17e57[https://fairsharing.org/10.25504/FAIRsharing....[https://fairsharing.org/10.25504/FAIRsharing....[European Variation Archive][FAIRsharing]
dedup::0a54b19a13b6712dc04d1b49215423d8[opendoar____::d34ab169b70c9dcd35e62896010cd9ff][377][yale medicine thesis digital library][OpenDOAR]
...............
dedup::f8306c8f16096b6d944799f4d427a976[re3data_____::574b553c6c374d597d2068ab2b117889][r3d100012041][Canadian Disaster Database][re3data]
dedup::f9d8e2daaa9144310b66bf948e50d656[re3data_____::95014789f83d7611ebfddace19d0523a][r3d100011045][Index to Marine & Lacustrine Geological Samples][re3data]
dedup::fcdbc4f504a15df8f78da88ee72fad32[opendoar____::9f96f36b7aae3b1ff847c26ac94c604e][4979][university of minnesota law school][OpenDOAR]
dedup::fcfe9c770eb9372e6961a17f7eaffd5f[4637][4637][Simon Fraser University Institutional Reposit...[roar]
dedup::fe73f687e5bc5280214e0486b273a5f9[330][330][DigitalCommons@Fort Lewis College: Scholarshi...[roar]
\n", "

109 rows × 4 columns

\n", "
" ], "text/plain": [ " duplicate_id \\\n", "dedup_id \n", "dedup::01846ae470651e97d2f73fce979406a9 [opendoar____::b4d6f2b565ca0eef1f9245403aac366a] \n", "dedup::022036087426786cfd0f7f41fa7a2665 [https://fairsharing.org/10.25504/FAIRsharing.... \n", "dedup::07e8b472e1e4af17a6b20ce083baf29f [15036] \n", "dedup::0894634a3244e3050d8057a453e17e57 [https://fairsharing.org/10.25504/FAIRsharing.... \n", "dedup::0a54b19a13b6712dc04d1b49215423d8 [opendoar____::d34ab169b70c9dcd35e62896010cd9ff] \n", "... ... \n", "dedup::f8306c8f16096b6d944799f4d427a976 [re3data_____::574b553c6c374d597d2068ab2b117889] \n", "dedup::f9d8e2daaa9144310b66bf948e50d656 [re3data_____::95014789f83d7611ebfddace19d0523a] \n", "dedup::fcdbc4f504a15df8f78da88ee72fad32 [opendoar____::9f96f36b7aae3b1ff847c26ac94c604e] \n", "dedup::fcfe9c770eb9372e6961a17f7eaffd5f [4637] \n", "dedup::fe73f687e5bc5280214e0486b273a5f9 [330] \n", "\n", " original_id \\\n", "dedup_id \n", "dedup::01846ae470651e97d2f73fce979406a9 [7668] \n", "dedup::022036087426786cfd0f7f41fa7a2665 [https://fairsharing.org/10.25504/FAIRsharing.... \n", "dedup::07e8b472e1e4af17a6b20ce083baf29f [15036] \n", "dedup::0894634a3244e3050d8057a453e17e57 [https://fairsharing.org/10.25504/FAIRsharing.... \n", "dedup::0a54b19a13b6712dc04d1b49215423d8 [377] \n", "... ... \n", "dedup::f8306c8f16096b6d944799f4d427a976 [r3d100012041] \n", "dedup::f9d8e2daaa9144310b66bf948e50d656 [r3d100011045] \n", "dedup::fcdbc4f504a15df8f78da88ee72fad32 [4979] \n", "dedup::fcfe9c770eb9372e6961a17f7eaffd5f [4637] \n", "dedup::fe73f687e5bc5280214e0486b273a5f9 [330] \n", "\n", " name \\\n", "dedup_id \n", "dedup::01846ae470651e97d2f73fce979406a9 [digital commons at michigan state university ... \n", "dedup::022036087426786cfd0f7f41fa7a2665 [World Data Center for Climate at DRKZ] \n", "dedup::07e8b472e1e4af17a6b20ce083baf29f [MiCISAN] \n", "dedup::0894634a3244e3050d8057a453e17e57 [European Variation Archive] \n", "dedup::0a54b19a13b6712dc04d1b49215423d8 [yale medicine thesis digital library] \n", "... ... \n", "dedup::f8306c8f16096b6d944799f4d427a976 [Canadian Disaster Database] \n", "dedup::f9d8e2daaa9144310b66bf948e50d656 [Index to Marine & Lacustrine Geological Samples] \n", "dedup::fcdbc4f504a15df8f78da88ee72fad32 [university of minnesota law school] \n", "dedup::fcfe9c770eb9372e6961a17f7eaffd5f [Simon Fraser University Institutional Reposit... \n", "dedup::fe73f687e5bc5280214e0486b273a5f9 [DigitalCommons@Fort Lewis College: Scholarshi... \n", "\n", " source \n", "dedup_id \n", "dedup::01846ae470651e97d2f73fce979406a9 [OpenDOAR] \n", "dedup::022036087426786cfd0f7f41fa7a2665 [FAIRsharing] \n", "dedup::07e8b472e1e4af17a6b20ce083baf29f [roar] \n", "dedup::0894634a3244e3050d8057a453e17e57 [FAIRsharing] \n", "dedup::0a54b19a13b6712dc04d1b49215423d8 [OpenDOAR] \n", "... ... \n", "dedup::f8306c8f16096b6d944799f4d427a976 [re3data] \n", "dedup::f9d8e2daaa9144310b66bf948e50d656 [re3data] \n", "dedup::fcdbc4f504a15df8f78da88ee72fad32 [OpenDOAR] \n", "dedup::fcfe9c770eb9372e6961a17f7eaffd5f [roar] \n", "dedup::fe73f687e5bc5280214e0486b273a5f9 [roar] \n", "\n", "[109 rows x 4 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dup[dup.source.str.len() == 1]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([list(['r3d100013204', 'r3d100013458', 'r3d100012807', 'r3d100012808', 'r3d100012806', 'r3d100012805']),\n", " list(['243', '5702', '5715', '5694', '5689', '5658', '5710', '5750', '5721', '5704']),\n", " list(['2738', '4991', '2727', '2729', '2724', '2728', '2740', '174']),\n", " list(['19', '8', '7', '11', '10', '13', '6', '12', '20', '15', '9', '5', '14', '16'])],\n", " dtype=object)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dup[dup.source.str.len() >= 6].original_id.values" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234
0[Harvard University, Institute for Quantitativ...[Ontario Council of University Libraries, [CBU...[Scholars Portal Dataverse, [], CAN, [general]...[The Dataverse Project, [], AAA, [technical], ...[University of Ottawa, Library, [Université d'...
1[Dataverse Project, [], AAA, [technical], non-...[Harvard University, Institute for Quantitave ...[Ontario Council of University Libraries, [CBU...[University of Toronto, Libraries, [], CAN, [g...None
2[Harvard University, Institute for Quantitativ...[Ontario Council of University Libraries, [CBU...[Scholars Portal Dataverse, [dataverse@scholar...[The Dataverse Project, [], AAA, [technical], ...[University of Windsor, [], CAN, [general], no...
3[Harvard University, Institute for Quantitativ...[Ontario Council of University Libraries, [CBU...[Scholars Portal Dataverse, [], CAN, [general]...[The Dataverse Project, [], AAA, [technical], ...[University of Waterloo, [], CAN, [general], n...
4[University of Victoria, [UVic], CAN, [general...[University of Victoria, Libraries, [], CAN, [...NoneNoneNone
5[The University of British Columbia, [], CAN, ...[University of British Columbia, Library, [], ...NoneNoneNone
\n", "
" ], "text/plain": [ " 0 \\\n", "0 [Harvard University, Institute for Quantitativ... \n", "1 [Dataverse Project, [], AAA, [technical], non-... \n", "2 [Harvard University, Institute for Quantitativ... \n", "3 [Harvard University, Institute for Quantitativ... \n", "4 [University of Victoria, [UVic], CAN, [general... \n", "5 [The University of British Columbia, [], CAN, ... \n", "\n", " 1 \\\n", "0 [Ontario Council of University Libraries, [CBU... \n", "1 [Harvard University, Institute for Quantitave ... \n", "2 [Ontario Council of University Libraries, [CBU... \n", "3 [Ontario Council of University Libraries, [CBU... \n", "4 [University of Victoria, Libraries, [], CAN, [... \n", "5 [University of British Columbia, Library, [], ... \n", "\n", " 2 \\\n", "0 [Scholars Portal Dataverse, [], CAN, [general]... \n", "1 [Ontario Council of University Libraries, [CBU... \n", "2 [Scholars Portal Dataverse, [dataverse@scholar... \n", "3 [Scholars Portal Dataverse, [], CAN, [general]... \n", "4 None \n", "5 None \n", "\n", " 3 \\\n", "0 [The Dataverse Project, [], AAA, [technical], ... \n", "1 [University of Toronto, Libraries, [], CAN, [g... \n", "2 [The Dataverse Project, [], AAA, [technical], ... \n", "3 [The Dataverse Project, [], AAA, [technical], ... \n", "4 None \n", "5 None \n", "\n", " 4 \n", "0 [University of Ottawa, Library, [Université d'... \n", "1 None \n", "2 [University of Windsor, [], CAN, [general], no... \n", "3 [University of Waterloo, [], CAN, [general], n... \n", "4 None \n", "5 None " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame(re3data_df[re3data_df.re3data_id.isin(['r3d100013204', 'r3d100013458', 'r3d100012807', 'r3d100012808', 'r3d100012806', 'r3d100012805'])].institution.to_list())" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidrev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
4167461228archive1380NaNNaNdisk0/00/00/46/122012-01-08 03:17:022012-04-16 10:53:042012-01-08 03:17:02institutionalNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNhttp://dspace.library.iitb.ac.in/jspui/IIT Bombay Institutional Repositoryhttp://dspace.library.iitb.ac.in/oai/requestNaNhttp://dspace.library.iitb.ac.in/xmlui/feed/at...NaNNaNTRUETRUETRUEIIT Bombayhttp://www.iitb.ac.ininMumbai19.13372.9166dspacegeoname_2_INotherTA2011-12-15 09:01:35NaNNaN000990,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,95,9...NaNNaNNaNNaNcelestial4790NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
41684612NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTDNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
41694612NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTHNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
41704612NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTJNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
41714612NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTKNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
41724612NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
41734612NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTPNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1643646498archive1380NaNNaNdisk0/00/00/46/492012-02-05 13:57:012012-04-16 10:39:582012-02-05 13:57:01institutionalNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNhttp://dspace.library.iitb.ac.in/jspui/IIT Bombay Institutional Repositoryhttp://dspace.library.iitb.ac.in/oaiNaNhttp://dspace.library.iitb.ac.in/xmlui/feed/rs...NaNNaNTRUETRUEFALSEIIT Bombayhttp://www.iitb.ac.ininMumbai19.13372.9166dspacegeoname_2_INotherT12012-01-05 12:09:37NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNcelestial4789NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
164374649NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTANaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " eprintid rev_number eprint_status userid importid source \\\n", "4167 4612 28 archive 1380 NaN NaN \n", "4168 4612 NaN NaN NaN NaN NaN \n", "4169 4612 NaN NaN NaN NaN NaN \n", "4170 4612 NaN NaN NaN NaN NaN \n", "4171 4612 NaN NaN NaN NaN NaN \n", "4172 4612 NaN NaN NaN NaN NaN \n", "4173 4612 NaN NaN NaN NaN NaN \n", "16436 4649 8 archive 1380 NaN NaN \n", "16437 4649 NaN NaN NaN NaN NaN \n", "\n", " dir datestamp lastmod \\\n", "4167 disk0/00/00/46/12 2012-01-08 03:17:02 2012-04-16 10:53:04 \n", "4168 NaN NaN NaN \n", "4169 NaN NaN NaN \n", "4170 NaN NaN NaN \n", "4171 NaN NaN NaN \n", "4172 NaN NaN NaN \n", "4173 NaN NaN NaN \n", "16436 disk0/00/00/46/49 2012-02-05 13:57:01 2012-04-16 10:39:58 \n", "16437 NaN NaN NaN \n", "\n", " status_changed type succeeds commentary \\\n", "4167 2012-01-08 03:17:02 institutional NaN NaN \n", "4168 NaN NaN NaN NaN \n", "4169 NaN NaN NaN NaN \n", "4170 NaN NaN NaN NaN \n", "4171 NaN NaN NaN NaN \n", "4172 NaN NaN NaN NaN \n", "4173 NaN NaN NaN NaN \n", "16436 2012-02-05 13:57:01 institutional NaN NaN \n", "16437 NaN NaN NaN NaN \n", "\n", " metadata_visibility latitude longitude relation_type relation_uri \\\n", "4167 show NaN NaN NaN NaN \n", "4168 NaN NaN NaN NaN NaN \n", "4169 NaN NaN NaN NaN NaN \n", "4170 NaN NaN NaN NaN NaN \n", "4171 NaN NaN NaN NaN NaN \n", "4172 NaN NaN NaN NaN NaN \n", "4173 NaN NaN NaN NaN NaN \n", "16436 show NaN NaN NaN NaN \n", "16437 NaN NaN NaN NaN NaN \n", "\n", " item_issues_id item_issues_type item_issues_description \\\n", "4167 NaN NaN NaN \n", "4168 NaN NaN NaN \n", "4169 NaN NaN NaN \n", "4170 NaN NaN NaN \n", "4171 NaN NaN NaN \n", "4172 NaN NaN NaN \n", "4173 NaN NaN NaN \n", "16436 NaN NaN NaN \n", "16437 NaN NaN NaN \n", "\n", " item_issues_timestamp item_issues_status item_issues_reported_by \\\n", "4167 NaN NaN NaN \n", "4168 NaN NaN NaN \n", "4169 NaN NaN NaN \n", "4170 NaN NaN NaN \n", "4171 NaN NaN NaN \n", "4172 NaN NaN NaN \n", "4173 NaN NaN NaN \n", "16436 NaN NaN NaN \n", "16437 NaN NaN NaN \n", "\n", " item_issues_resolved_by item_issues_comment item_issues_count \\\n", "4167 NaN NaN NaN \n", "4168 NaN NaN NaN \n", "4169 NaN NaN NaN \n", "4170 NaN NaN NaN \n", "4171 NaN NaN NaN \n", "4172 NaN NaN NaN \n", "4173 NaN NaN NaN \n", "16436 NaN NaN NaN \n", "16437 NaN NaN NaN \n", "\n", " sword_depositor sword_slug exemplar \\\n", "4167 NaN NaN NaN \n", "4168 NaN NaN NaN \n", "4169 NaN NaN NaN \n", "4170 NaN NaN NaN \n", "4171 NaN NaN NaN \n", "4172 NaN NaN NaN \n", "4173 NaN NaN NaN \n", "16436 NaN NaN NaN \n", "16437 NaN NaN NaN \n", "\n", " home_page \\\n", "4167 http://dspace.library.iitb.ac.in/jspui/ \n", "4168 NaN \n", "4169 NaN \n", "4170 NaN \n", "4171 NaN \n", "4172 NaN \n", "4173 NaN \n", "16436 http://dspace.library.iitb.ac.in/jspui/ \n", "16437 NaN \n", "\n", " title \\\n", "4167 IIT Bombay Institutional Repository \n", "4168 NaN \n", "4169 NaN \n", "4170 NaN \n", "4171 NaN \n", "4172 NaN \n", "4173 NaN \n", "16436 IIT Bombay Institutional Repository \n", "16437 NaN \n", "\n", " oai_pmh sword_endpoint \\\n", "4167 http://dspace.library.iitb.ac.in/oai/request NaN \n", "4168 NaN NaN \n", "4169 NaN NaN \n", "4170 NaN NaN \n", "4171 NaN NaN \n", "4172 NaN NaN \n", "4173 NaN NaN \n", "16436 http://dspace.library.iitb.ac.in/oai NaN \n", "16437 NaN NaN \n", "\n", " rss_feed twitter_feed \\\n", "4167 http://dspace.library.iitb.ac.in/xmlui/feed/at... NaN \n", "4168 NaN NaN \n", "4169 NaN NaN \n", "4170 NaN NaN \n", "4171 NaN NaN \n", "4172 NaN NaN \n", "4173 NaN NaN \n", "16436 http://dspace.library.iitb.ac.in/xmlui/feed/rs... NaN \n", "16437 NaN NaN \n", "\n", " description fulltext open_access mandate organisation_title \\\n", "4167 NaN TRUE TRUE TRUE IIT Bombay \n", "4168 NaN NaN NaN NaN NaN \n", "4169 NaN NaN NaN NaN NaN \n", "4170 NaN NaN NaN NaN NaN \n", "4171 NaN NaN NaN NaN NaN \n", "4172 NaN NaN NaN NaN NaN \n", "4173 NaN NaN NaN NaN NaN \n", "16436 NaN TRUE TRUE FALSE IIT Bombay \n", "16437 NaN NaN NaN NaN NaN \n", "\n", " organisation_home_page location_country location_city location_latitude \\\n", "4167 http://www.iitb.ac.in in Mumbai 19.133 \n", "4168 NaN NaN NaN NaN \n", "4169 NaN NaN NaN NaN \n", "4170 NaN NaN NaN NaN \n", "4171 NaN NaN NaN NaN \n", "4172 NaN NaN NaN NaN \n", "4173 NaN NaN NaN NaN \n", "16436 http://www.iitb.ac.in in Mumbai 19.133 \n", "16437 NaN NaN NaN NaN \n", "\n", " location_longitude software geoname version subjects \\\n", "4167 72.9166 dspace geoname_2_IN other TA \n", "4168 NaN NaN NaN NaN TD \n", "4169 NaN NaN NaN NaN TH \n", "4170 NaN NaN NaN NaN TJ \n", "4171 NaN NaN NaN NaN TK \n", "4172 NaN NaN NaN NaN TN \n", "4173 NaN NaN NaN NaN TP \n", "16436 72.9166 dspace geoname_2_IN other T1 \n", "16437 NaN NaN NaN NaN TA \n", "\n", " date note suggestions activity_low activity_medium \\\n", "4167 2011-12-15 09:01:35 NaN NaN 0 0 \n", "4168 NaN NaN NaN NaN NaN \n", "4169 NaN NaN NaN NaN NaN \n", "4170 NaN NaN NaN NaN NaN \n", "4171 NaN NaN NaN NaN NaN \n", "4172 NaN NaN NaN NaN NaN \n", "4173 NaN NaN NaN NaN NaN \n", "16436 2012-01-05 12:09:37 NaN NaN NaN NaN \n", "16437 NaN NaN NaN NaN NaN \n", "\n", " activity_high recordcount \\\n", "4167 0 99 \n", "4168 NaN NaN \n", "4169 NaN NaN \n", "4170 NaN NaN \n", "4171 NaN NaN \n", "4172 NaN NaN \n", "4173 NaN NaN \n", "16436 NaN NaN \n", "16437 NaN NaN \n", "\n", " recordhistory fulltexts_total \\\n", "4167 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,95,9... NaN \n", "4168 NaN NaN \n", "4169 NaN NaN \n", "4170 NaN NaN \n", "4171 NaN NaN \n", "4172 NaN NaN \n", "4173 NaN NaN \n", "16436 NaN NaN \n", "16437 NaN NaN \n", "\n", " fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name \\\n", "4167 NaN NaN NaN celestial \n", "4168 NaN NaN NaN NaN \n", "4169 NaN NaN NaN NaN \n", "4170 NaN NaN NaN NaN \n", "4171 NaN NaN NaN NaN \n", "4172 NaN NaN NaN NaN \n", "4173 NaN NaN NaN NaN \n", "16436 NaN NaN NaN celestial \n", "16437 NaN NaN NaN NaN \n", "\n", " registry_id submit_to submitted_to_name submitted_to_done \\\n", "4167 4790 NaN NaN NaN \n", "4168 NaN NaN NaN NaN \n", "4169 NaN NaN NaN NaN \n", "4170 NaN NaN NaN NaN \n", "4171 NaN NaN NaN NaN \n", "4172 NaN NaN NaN NaN \n", "4173 NaN NaN NaN NaN \n", "16436 4789 NaN NaN NaN \n", "16437 NaN NaN NaN NaN \n", "\n", " webometrics_rank webometrics_size webometrics_visibility \\\n", "4167 NaN NaN NaN \n", "4168 NaN NaN NaN \n", "4169 NaN NaN NaN \n", "4170 NaN NaN NaN \n", "4171 NaN NaN NaN \n", "4172 NaN NaN NaN \n", "4173 NaN NaN NaN \n", "16436 NaN NaN NaN \n", "16437 NaN NaN NaN \n", "\n", " webometrics_rich_files webometrics_scholar monthly_deposits \\\n", "4167 NaN NaN NaN \n", "4168 NaN NaN NaN \n", "4169 NaN NaN NaN \n", "4170 NaN NaN NaN \n", "4171 NaN NaN NaN \n", "4172 NaN NaN NaN \n", "4173 NaN NaN NaN \n", "16436 NaN NaN NaN \n", "16437 NaN NaN NaN \n", "\n", " total_deposits association \n", "4167 NaN NaN \n", "4168 NaN NaN \n", "4169 NaN NaN \n", "4170 NaN NaN \n", "4171 NaN NaN \n", "4172 NaN NaN \n", "4173 NaN NaN \n", "16436 NaN NaN \n", "16437 NaN NaN " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df[roar_df.eprintid.isin(['4612', '4649'])]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
openaire_idre3data_idrepository_nameadditional_namerepository_urlrepository_iddescriptiontypesizeupdate_datestart_dateend_datesubjectmission_statementcontent_typeprovider_typekeywordinstitutionpolicydatabase_accessdatabase_licensedata_accessdata_licensedata_uploaddata_upload_licensesoftwareversioningapipid_systemcitation_guideline_urlaid_systemenhanced_publicationquality_managementcertificatemetadata_standardsyndicationremarksentry_datelast_update
1090re3data_____::4af9fe2bb93511a5e0f0c39e94d6557fr3d100011306RESID Database of Protein Modifications[]https://pir.georgetown.edu/resid/resid.shtml[FAIRsharing_doi:10.25504/FAIRsharing.qaszjp, ...The RESID Database of Protein Modifications is...[disciplinary]NaNNaN2014NaN[2 Life Sciences, 201 Basic Biological and Med...false[Images, Structured text][dataProvider][genomes, life sciences, proteins, proteomes, ...[[Georgetown University, Medical Center, [GUMC...truetruefalsetruetruetruefalsetrueyestruetruetruetrueyesunknownfalsefalsefalseRESID is covered by Thomson Reuters Data Citat...2014-12-052019-01-17
\n", "
" ], "text/plain": [ " openaire_id re3data_id \\\n", "1090 re3data_____::4af9fe2bb93511a5e0f0c39e94d6557f r3d100011306 \n", "\n", " repository_name additional_name \\\n", "1090 RESID Database of Protein Modifications [] \n", "\n", " repository_url \\\n", "1090 https://pir.georgetown.edu/resid/resid.shtml \n", "\n", " repository_id \\\n", "1090 [FAIRsharing_doi:10.25504/FAIRsharing.qaszjp, ... \n", "\n", " description type size \\\n", "1090 The RESID Database of Protein Modifications is... [disciplinary] NaN \n", "\n", " update_date start_date end_date \\\n", "1090 NaN 2014 NaN \n", "\n", " subject mission_statement \\\n", "1090 [2 Life Sciences, 201 Basic Biological and Med... false \n", "\n", " content_type provider_type \\\n", "1090 [Images, Structured text] [dataProvider] \n", "\n", " keyword \\\n", "1090 [genomes, life sciences, proteins, proteomes, ... \n", "\n", " institution policy \\\n", "1090 [[Georgetown University, Medical Center, [GUMC... true \n", "\n", " database_access database_license data_access data_license data_upload \\\n", "1090 true false true true true \n", "\n", " data_upload_license software versioning api pid_system \\\n", "1090 false true yes true true \n", "\n", " citation_guideline_url aid_system enhanced_publication \\\n", "1090 true true yes \n", "\n", " quality_management certificate metadata_standard syndication \\\n", "1090 unknown false false false \n", "\n", " remarks entry_date \\\n", "1090 RESID is covered by Thomson Reuters Data Citat... 2014-12-05 \n", "\n", " last_update \n", "1090 2019-01-17 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_df[re3data_df.re3data_id == 'r3d100011306']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }