{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import ast\n", "import csv\n", "import json\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import matplotlib.pyplot as plt\n", "from matplotlib_venn import venn2, venn2_circles\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px\n", "\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dedup_idduplicate_idoriginal_idnamesource
count45134513451345134513
unique21454513426138944
topdedup::75e33da9b103b7b91dcd8da0abe1354bhttps://fairsharing.org/bsg-d0015203860UPN JATIM REPOSITORYroar
freq51241933
\n", "
" ], "text/plain": [ " dedup_id \\\n", "count 4513 \n", "unique 2145 \n", "top dedup::75e33da9b103b7b91dcd8da0abe1354b \n", "freq 5 \n", "\n", " duplicate_id original_id name \\\n", "count 4513 4513 4513 \n", "unique 4513 4261 3894 \n", "top https://fairsharing.org/bsg-d001520 3860 UPN JATIM REPOSITORY \n", "freq 1 2 4 \n", "\n", " source \n", "count 4513 \n", "unique 4 \n", "top roar \n", "freq 1933 " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_09 = pd.read_csv('../data/interim/ds_dedup09.csv', sep=';', quotechar='\"', header=None, names=['dedup_id', 'duplicate_id', 'original_id', 'name', 'source'])\n", "df_09.describe()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "re3data_df = pd.read_csv('../data/raw/re3data.tsv', delimiter='\\t', \n", " converters={'subject': ast.literal_eval,\n", " 'keyword': ast.literal_eval,\n", " 'additional_name': ast.literal_eval,\n", " 'repository_id': ast.literal_eval,\n", " 'type': ast.literal_eval,\n", " 'content_type': ast.literal_eval,\n", " 'provider_type': ast.literal_eval,\n", " 'institution': ast.literal_eval\n", " })" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "opendoar_df = pd.read_csv('../data/raw/openDoar.tsv', delimiter='\\t',\n", " converters={'subject': ast.literal_eval,\n", " 'additional_name': ast.literal_eval,\n", " 'opendoar_id': ast.literal_eval,\n", " 'content_type': ast.literal_eval,\n", " 'institution': ast.literal_eval\n", " })" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "roar_df = pd.read_csv('../data/raw/export_roar_CSV.csv', dtype='str')" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "fairsharing_df = pd.read_csv('../data/raw/FAIRsharingDBrec_summary20210304.csv', \n", " delimiter='|', header=0,\n", " names=['full_name', 'short_name', 'fs_url', 'url', 'countries', 'subjects'])" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dedup_idduplicate_idoriginal_idnamesource
0dedup::252773ebafcbbac75238b419d964068ehttps://fairsharing.org/bsg-d001520https://fairsharing.org/bsg-d001520ACTRIS Data CentreFAIRsharing
1dedup::860320be12a1c050cd7731794e231bd3opendoar____::2290a7385ed77cc5592dc2153229f0821064oxford university research archiveOpenDOAR
2dedup::1aa7a8773e6a7fdacbcedf9999009a38opendoar____::191f8f858acda435ae0daf994e2a72c28648digital commons@georgia southernOpenDOAR
3dedup::4801441f041958afaca324c43c40787b1045310453MCStorroar
4dedup::2841194266115ac1cc04d19630cde46bre3data_____::3afbb2b45a3dd218a5a091ca773cf6c5r3d100011189PRISM: University of Calgary's Digital Repositoryre3data
\n", "
" ], "text/plain": [ " dedup_id \\\n", "0 dedup::252773ebafcbbac75238b419d964068e \n", "1 dedup::860320be12a1c050cd7731794e231bd3 \n", "2 dedup::1aa7a8773e6a7fdacbcedf9999009a38 \n", "3 dedup::4801441f041958afaca324c43c40787b \n", "4 dedup::2841194266115ac1cc04d19630cde46b \n", "\n", " duplicate_id \\\n", "0 https://fairsharing.org/bsg-d001520 \n", "1 opendoar____::2290a7385ed77cc5592dc2153229f082 \n", "2 opendoar____::191f8f858acda435ae0daf994e2a72c2 \n", "3 10453 \n", "4 re3data_____::3afbb2b45a3dd218a5a091ca773cf6c5 \n", "\n", " original_id \\\n", "0 https://fairsharing.org/bsg-d001520 \n", "1 1064 \n", "2 8648 \n", "3 10453 \n", "4 r3d100011189 \n", "\n", " name source \n", "0 ACTRIS Data Centre FAIRsharing \n", "1 oxford university research archive OpenDOAR \n", "2 digital commons@georgia southern OpenDOAR \n", "3 MCStor roar \n", "4 PRISM: University of Calgary's Digital Repository re3data " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_09.head()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
duplicate_idoriginal_idnamesourcesource_set
dedup_id
dedup::000871c1fc726f0b52dc86a4eeb027de[4612, 4649][4612, 4649][IIT Bombay Institutional Repository, IIT Bomb...[roar, roar]{roar}
dedup::001e6d882e54c780ce269d3c46997287[re3data_____::4af9fe2bb93511a5e0f0c39e94d6557...[r3d100011306, https://fairsharing.org/10.2550...[RESID Database of Protein Modifications, RESI...[re3data, FAIRsharing]{re3data, FAIRsharing}
dedup::0023a1e3447fdb31836536cc903f1310[opendoar____::c6f798b844366ccd65d99bc7f31e0e0...[3410, 10013][erucu: electronic repository of the ukrainian...[OpenDOAR, roar]{OpenDOAR, roar}
dedup::003ab6b40af9b488decea7c582d150a2[https://fairsharing.org/10.25504/FAIRsharing....[https://fairsharing.org/10.25504/FAIRsharing....[Synapse, Synapse][FAIRsharing, re3data]{re3data, FAIRsharing}
dedup::0064f599ed0adb5870a5b3ffe438e485[16034, opendoar____::d1f157379ea7e51d4a8c07af...[16034, 9647][Giresun University Institutional Repository, ...[roar, OpenDOAR]{OpenDOAR, roar}
..................
dedup::ff49cc40a8890e6a60f40ff3026d2730[1333, opendoar____::2bd7f907b7f5b6bbd91822c0c...[1333, 1389][UnissResearch, unissresearch][roar, OpenDOAR]{OpenDOAR, roar}
dedup::ff4d70de478038c72282b7e4af1d4260[opendoar____::95a0810a93a87065bf7b28490817e9e...[9752, 16367][european xfel publication database, European ...[OpenDOAR, roar]{OpenDOAR, roar}
dedup::ff826ce6ee85809389f18a5fafe72366[opendoar____::62e7f2e090fe150ef8deb4466fdc81b...[3601, 2608][electronic odessa national economic universit...[OpenDOAR, OpenDOAR]{OpenDOAR}
dedup::ffbd6cbb019a1413183c8d08f2929307[3108, opendoar____::ff7d0f525b3be596a51fb9194...[3108, 1912][Fotografía Sobre España en el Siglo XIX, foto...[roar, OpenDOAR]{OpenDOAR, roar}
dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98[1126, opendoar____::443cb001c138b2561a0d90720...[1126, 648][RIT Digital Media Library, rit digital media ...[roar, OpenDOAR]{OpenDOAR, roar}
\n", "

2145 rows × 5 columns

\n", "
" ], "text/plain": [ " duplicate_id \\\n", "dedup_id \n", "dedup::000871c1fc726f0b52dc86a4eeb027de [4612, 4649] \n", "dedup::001e6d882e54c780ce269d3c46997287 [re3data_____::4af9fe2bb93511a5e0f0c39e94d6557... \n", "dedup::0023a1e3447fdb31836536cc903f1310 [opendoar____::c6f798b844366ccd65d99bc7f31e0e0... \n", "dedup::003ab6b40af9b488decea7c582d150a2 [https://fairsharing.org/10.25504/FAIRsharing.... \n", "dedup::0064f599ed0adb5870a5b3ffe438e485 [16034, opendoar____::d1f157379ea7e51d4a8c07af... \n", "... ... \n", "dedup::ff49cc40a8890e6a60f40ff3026d2730 [1333, opendoar____::2bd7f907b7f5b6bbd91822c0c... \n", "dedup::ff4d70de478038c72282b7e4af1d4260 [opendoar____::95a0810a93a87065bf7b28490817e9e... \n", "dedup::ff826ce6ee85809389f18a5fafe72366 [opendoar____::62e7f2e090fe150ef8deb4466fdc81b... \n", "dedup::ffbd6cbb019a1413183c8d08f2929307 [3108, opendoar____::ff7d0f525b3be596a51fb9194... \n", "dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [1126, opendoar____::443cb001c138b2561a0d90720... \n", "\n", " original_id \\\n", "dedup_id \n", "dedup::000871c1fc726f0b52dc86a4eeb027de [4612, 4649] \n", "dedup::001e6d882e54c780ce269d3c46997287 [r3d100011306, https://fairsharing.org/10.2550... \n", "dedup::0023a1e3447fdb31836536cc903f1310 [3410, 10013] \n", "dedup::003ab6b40af9b488decea7c582d150a2 [https://fairsharing.org/10.25504/FAIRsharing.... \n", "dedup::0064f599ed0adb5870a5b3ffe438e485 [16034, 9647] \n", "... ... \n", "dedup::ff49cc40a8890e6a60f40ff3026d2730 [1333, 1389] \n", "dedup::ff4d70de478038c72282b7e4af1d4260 [9752, 16367] \n", "dedup::ff826ce6ee85809389f18a5fafe72366 [3601, 2608] \n", "dedup::ffbd6cbb019a1413183c8d08f2929307 [3108, 1912] \n", "dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [1126, 648] \n", "\n", " name \\\n", "dedup_id \n", "dedup::000871c1fc726f0b52dc86a4eeb027de [IIT Bombay Institutional Repository, IIT Bomb... \n", "dedup::001e6d882e54c780ce269d3c46997287 [RESID Database of Protein Modifications, RESI... \n", "dedup::0023a1e3447fdb31836536cc903f1310 [erucu: electronic repository of the ukrainian... \n", "dedup::003ab6b40af9b488decea7c582d150a2 [Synapse, Synapse] \n", "dedup::0064f599ed0adb5870a5b3ffe438e485 [Giresun University Institutional Repository, ... \n", "... ... \n", "dedup::ff49cc40a8890e6a60f40ff3026d2730 [UnissResearch, unissresearch] \n", "dedup::ff4d70de478038c72282b7e4af1d4260 [european xfel publication database, European ... \n", "dedup::ff826ce6ee85809389f18a5fafe72366 [electronic odessa national economic universit... \n", "dedup::ffbd6cbb019a1413183c8d08f2929307 [Fotografía Sobre España en el Siglo XIX, foto... \n", "dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [RIT Digital Media Library, rit digital media ... \n", "\n", " source \\\n", "dedup_id \n", "dedup::000871c1fc726f0b52dc86a4eeb027de [roar, roar] \n", "dedup::001e6d882e54c780ce269d3c46997287 [re3data, FAIRsharing] \n", "dedup::0023a1e3447fdb31836536cc903f1310 [OpenDOAR, roar] \n", "dedup::003ab6b40af9b488decea7c582d150a2 [FAIRsharing, re3data] \n", "dedup::0064f599ed0adb5870a5b3ffe438e485 [roar, OpenDOAR] \n", "... ... \n", "dedup::ff49cc40a8890e6a60f40ff3026d2730 [roar, OpenDOAR] \n", "dedup::ff4d70de478038c72282b7e4af1d4260 [OpenDOAR, roar] \n", "dedup::ff826ce6ee85809389f18a5fafe72366 [OpenDOAR, OpenDOAR] \n", "dedup::ffbd6cbb019a1413183c8d08f2929307 [roar, OpenDOAR] \n", "dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [roar, OpenDOAR] \n", "\n", " source_set \n", "dedup_id \n", "dedup::000871c1fc726f0b52dc86a4eeb027de {roar} \n", "dedup::001e6d882e54c780ce269d3c46997287 {re3data, FAIRsharing} \n", "dedup::0023a1e3447fdb31836536cc903f1310 {OpenDOAR, roar} \n", "dedup::003ab6b40af9b488decea7c582d150a2 {re3data, FAIRsharing} \n", "dedup::0064f599ed0adb5870a5b3ffe438e485 {OpenDOAR, roar} \n", "... ... \n", "dedup::ff49cc40a8890e6a60f40ff3026d2730 {OpenDOAR, roar} \n", "dedup::ff4d70de478038c72282b7e4af1d4260 {OpenDOAR, roar} \n", "dedup::ff826ce6ee85809389f18a5fafe72366 {OpenDOAR} \n", "dedup::ffbd6cbb019a1413183c8d08f2929307 {OpenDOAR, roar} \n", "dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 {OpenDOAR, roar} \n", "\n", "[2145 rows x 5 columns]" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dup = df_09.groupby('dedup_id').aggregate(list)\n", "dup['source_set'] = dup.source.map(set)\n", "dup" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
duplicate_idoriginal_idnamesourcesource_set
dedup_id
dedup::06138bc5af6023646ede0e1f7c1eac75[https://fairsharing.org/10.25504/FAIRsharing....[https://fairsharing.org/10.25504/FAIRsharing....[Crystallography Open Database, Crystallograph...[FAIRsharing, roar, re3data, OpenDOAR]{re3data, OpenDOAR, roar, FAIRsharing}
dedup::0b7e684c89e746c67c9761ce2b65479c[re3data_____::44217da669f17a260c0958a679003a7...[r3d100010423, 375, https://fairsharing.org/10...[Woods Hole Open Access Server, woods hole ope...[re3data, OpenDOAR, FAIRsharing]{re3data, FAIRsharing, OpenDOAR}
dedup::0bb4aec1710521c12ee76289d9440817[re3data_____::eb721a14697a05c477d0ae23830e665...[r3d100012596, 650, 349][Digitale Bibliothek Thüringen, digitale bibli...[re3data, OpenDOAR, roar]{re3data, roar, OpenDOAR}
dedup::139042a4157a773f209847829d80894d[756, re3data_____::a95b34b344dc049963c35997fe...[756, r3d100010690, 1330, 5487][Khazar University Institutional Repository, K...[roar, re3data, OpenDOAR, roar]{re3data, roar, OpenDOAR}
dedup::13d4bfa0321f86f042b34ec79064b316[opendoar____::e3844e186e6eb8736e9f53c0c588952...[9411, 15255, r3d100013135][fordatis, Fordatis, Fordatis][OpenDOAR, roar, re3data]{OpenDOAR, re3data, roar}
..................
dedup::eded0708dfe855304a50029fccf1a677[opendoar____::eccbc87e4b5ce2fe28308fd9f2a7baf...[3, r3d100012604, 5509][ams acta, AMS Acta, AMS Acta][OpenDOAR, re3data, roar]{OpenDOAR, roar, re3data}
dedup::ef46a43afd7c7d67e21f4306bb1364e9[opendoar____::1f74a54f39b3123ad272ca0a06e7463...[5870, https://fairsharing.org/10.25504/FAIRsh...[heidata, heiDATA, heiDATA][OpenDOAR, FAIRsharing, re3data]{OpenDOAR, re3data, FAIRsharing}
dedup::f296bb3903d8a84d81c47e6db90764b9[https://fairsharing.org/10.25504/FAIRsharing....[https://fairsharing.org/10.25504/FAIRsharing....[PubChem, PubChem, pubchem][FAIRsharing, re3data, OpenDOAR]{re3data, OpenDOAR, FAIRsharing}
dedup::f3dc6512e46961c363ea402ff218c8fb[re3data_____::b2fc675049cbf485d9abbccf5232a31...[r3d100012538, 10171, https://fairsharing.org/...[DataverseNO, dataverseno, DataverseNO][re3data, OpenDOAR, FAIRsharing]{re3data, FAIRsharing, OpenDOAR}
dedup::f9aa64cbb57131939eda048250f2dbae[re3data_____::063765fa6d6358d62ea2d41dde32d3a...[r3d100012692, https://fairsharing.org/10.2550...[Scholars' Mine, Scholars' Mine, scholars mine][re3data, FAIRsharing, OpenDOAR]{re3data, OpenDOAR, FAIRsharing}
\n", "

65 rows × 5 columns

\n", "
" ], "text/plain": [ " duplicate_id \\\n", "dedup_id \n", "dedup::06138bc5af6023646ede0e1f7c1eac75 [https://fairsharing.org/10.25504/FAIRsharing.... \n", "dedup::0b7e684c89e746c67c9761ce2b65479c [re3data_____::44217da669f17a260c0958a679003a7... \n", "dedup::0bb4aec1710521c12ee76289d9440817 [re3data_____::eb721a14697a05c477d0ae23830e665... \n", "dedup::139042a4157a773f209847829d80894d [756, re3data_____::a95b34b344dc049963c35997fe... \n", "dedup::13d4bfa0321f86f042b34ec79064b316 [opendoar____::e3844e186e6eb8736e9f53c0c588952... \n", "... ... \n", "dedup::eded0708dfe855304a50029fccf1a677 [opendoar____::eccbc87e4b5ce2fe28308fd9f2a7baf... \n", "dedup::ef46a43afd7c7d67e21f4306bb1364e9 [opendoar____::1f74a54f39b3123ad272ca0a06e7463... \n", "dedup::f296bb3903d8a84d81c47e6db90764b9 [https://fairsharing.org/10.25504/FAIRsharing.... \n", "dedup::f3dc6512e46961c363ea402ff218c8fb [re3data_____::b2fc675049cbf485d9abbccf5232a31... \n", "dedup::f9aa64cbb57131939eda048250f2dbae [re3data_____::063765fa6d6358d62ea2d41dde32d3a... \n", "\n", " original_id \\\n", "dedup_id \n", "dedup::06138bc5af6023646ede0e1f7c1eac75 [https://fairsharing.org/10.25504/FAIRsharing.... \n", "dedup::0b7e684c89e746c67c9761ce2b65479c [r3d100010423, 375, https://fairsharing.org/10... \n", "dedup::0bb4aec1710521c12ee76289d9440817 [r3d100012596, 650, 349] \n", "dedup::139042a4157a773f209847829d80894d [756, r3d100010690, 1330, 5487] \n", "dedup::13d4bfa0321f86f042b34ec79064b316 [9411, 15255, r3d100013135] \n", "... ... \n", "dedup::eded0708dfe855304a50029fccf1a677 [3, r3d100012604, 5509] \n", "dedup::ef46a43afd7c7d67e21f4306bb1364e9 [5870, https://fairsharing.org/10.25504/FAIRsh... \n", "dedup::f296bb3903d8a84d81c47e6db90764b9 [https://fairsharing.org/10.25504/FAIRsharing.... \n", "dedup::f3dc6512e46961c363ea402ff218c8fb [r3d100012538, 10171, https://fairsharing.org/... \n", "dedup::f9aa64cbb57131939eda048250f2dbae [r3d100012692, https://fairsharing.org/10.2550... \n", "\n", " name \\\n", "dedup_id \n", "dedup::06138bc5af6023646ede0e1f7c1eac75 [Crystallography Open Database, Crystallograph... \n", "dedup::0b7e684c89e746c67c9761ce2b65479c [Woods Hole Open Access Server, woods hole ope... \n", "dedup::0bb4aec1710521c12ee76289d9440817 [Digitale Bibliothek Thüringen, digitale bibli... \n", "dedup::139042a4157a773f209847829d80894d [Khazar University Institutional Repository, K... \n", "dedup::13d4bfa0321f86f042b34ec79064b316 [fordatis, Fordatis, Fordatis] \n", "... ... \n", "dedup::eded0708dfe855304a50029fccf1a677 [ams acta, AMS Acta, AMS Acta] \n", "dedup::ef46a43afd7c7d67e21f4306bb1364e9 [heidata, heiDATA, heiDATA] \n", "dedup::f296bb3903d8a84d81c47e6db90764b9 [PubChem, PubChem, pubchem] \n", "dedup::f3dc6512e46961c363ea402ff218c8fb [DataverseNO, dataverseno, DataverseNO] \n", "dedup::f9aa64cbb57131939eda048250f2dbae [Scholars' Mine, Scholars' Mine, scholars mine] \n", "\n", " source \\\n", "dedup_id \n", "dedup::06138bc5af6023646ede0e1f7c1eac75 [FAIRsharing, roar, re3data, OpenDOAR] \n", "dedup::0b7e684c89e746c67c9761ce2b65479c [re3data, OpenDOAR, FAIRsharing] \n", "dedup::0bb4aec1710521c12ee76289d9440817 [re3data, OpenDOAR, roar] \n", "dedup::139042a4157a773f209847829d80894d [roar, re3data, OpenDOAR, roar] \n", "dedup::13d4bfa0321f86f042b34ec79064b316 [OpenDOAR, roar, re3data] \n", "... ... \n", "dedup::eded0708dfe855304a50029fccf1a677 [OpenDOAR, re3data, roar] \n", "dedup::ef46a43afd7c7d67e21f4306bb1364e9 [OpenDOAR, FAIRsharing, re3data] \n", "dedup::f296bb3903d8a84d81c47e6db90764b9 [FAIRsharing, re3data, OpenDOAR] \n", "dedup::f3dc6512e46961c363ea402ff218c8fb [re3data, OpenDOAR, FAIRsharing] \n", "dedup::f9aa64cbb57131939eda048250f2dbae [re3data, FAIRsharing, OpenDOAR] \n", "\n", " source_set \n", "dedup_id \n", "dedup::06138bc5af6023646ede0e1f7c1eac75 {re3data, OpenDOAR, roar, FAIRsharing} \n", "dedup::0b7e684c89e746c67c9761ce2b65479c {re3data, FAIRsharing, OpenDOAR} \n", "dedup::0bb4aec1710521c12ee76289d9440817 {re3data, roar, OpenDOAR} \n", "dedup::139042a4157a773f209847829d80894d {re3data, roar, OpenDOAR} \n", "dedup::13d4bfa0321f86f042b34ec79064b316 {OpenDOAR, re3data, roar} \n", "... ... \n", "dedup::eded0708dfe855304a50029fccf1a677 {OpenDOAR, roar, re3data} \n", "dedup::ef46a43afd7c7d67e21f4306bb1364e9 {OpenDOAR, re3data, FAIRsharing} \n", "dedup::f296bb3903d8a84d81c47e6db90764b9 {re3data, OpenDOAR, FAIRsharing} \n", "dedup::f3dc6512e46961c363ea402ff218c8fb {re3data, FAIRsharing, OpenDOAR} \n", "dedup::f9aa64cbb57131939eda048250f2dbae {re3data, OpenDOAR, FAIRsharing} \n", "\n", "[65 rows x 5 columns]" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dup[dup.source_set.str.len() >= 3]" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234
0[Harvard University, Institute for Quantitativ...[Ontario Council of University Libraries, [CBU...[Scholars Portal Dataverse, [], CAN, [general]...[The Dataverse Project, [], AAA, [technical], ...[University of Ottawa, Library, [Université d'...
1[Dataverse Project, [], AAA, [technical], non-...[Harvard University, Institute for Quantitave ...[Ontario Council of University Libraries, [CBU...[University of Toronto, Libraries, [], CAN, [g...None
2[Harvard University, Institute for Quantitativ...[Ontario Council of University Libraries, [CBU...[Scholars Portal Dataverse, [dataverse@scholar...[The Dataverse Project, [], AAA, [technical], ...[University of Windsor, [], CAN, [general], no...
3[Harvard University, Institute for Quantitativ...[Ontario Council of University Libraries, [CBU...[Scholars Portal Dataverse, [], CAN, [general]...[The Dataverse Project, [], AAA, [technical], ...[University of Waterloo, [], CAN, [general], n...
4[University of Victoria, [UVic], CAN, [general...[University of Victoria, Libraries, [], CAN, [...NoneNoneNone
5[The University of British Columbia, [], CAN, ...[University of British Columbia, Library, [], ...NoneNoneNone
\n", "
" ], "text/plain": [ " 0 \\\n", "0 [Harvard University, Institute for Quantitativ... \n", "1 [Dataverse Project, [], AAA, [technical], non-... \n", "2 [Harvard University, Institute for Quantitativ... \n", "3 [Harvard University, Institute for Quantitativ... \n", "4 [University of Victoria, [UVic], CAN, [general... \n", "5 [The University of British Columbia, [], CAN, ... \n", "\n", " 1 \\\n", "0 [Ontario Council of University Libraries, [CBU... \n", "1 [Harvard University, Institute for Quantitave ... \n", "2 [Ontario Council of University Libraries, [CBU... \n", "3 [Ontario Council of University Libraries, [CBU... \n", "4 [University of Victoria, Libraries, [], CAN, [... \n", "5 [University of British Columbia, Library, [], ... \n", "\n", " 2 \\\n", "0 [Scholars Portal Dataverse, [], CAN, [general]... \n", "1 [Ontario Council of University Libraries, [CBU... \n", "2 [Scholars Portal Dataverse, [dataverse@scholar... \n", "3 [Scholars Portal Dataverse, [], CAN, [general]... \n", "4 None \n", "5 None \n", "\n", " 3 \\\n", "0 [The Dataverse Project, [], AAA, [technical], ... \n", "1 [University of Toronto, Libraries, [], CAN, [g... \n", "2 [The Dataverse Project, [], AAA, [technical], ... \n", "3 [The Dataverse Project, [], AAA, [technical], ... \n", "4 None \n", "5 None \n", "\n", " 4 \n", "0 [University of Ottawa, Library, [Université d'... \n", "1 None \n", "2 [University of Windsor, [], CAN, [general], no... \n", "3 [University of Waterloo, [], CAN, [general], n... \n", "4 None \n", "5 None " ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame(re3data_df[re3data_df.re3data_id.isin(['r3d100013204', 'r3d100013458', 'r3d100012807', 'r3d100012808', 'r3d100012806', 'r3d100012805'])].institution.to_list())" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eprintidrev_numbereprint_statususeridimportidsourcedirdatestamplastmodstatus_changedtypesucceedscommentarymetadata_visibilitylatitudelongituderelation_typerelation_uriitem_issues_iditem_issues_typeitem_issues_descriptionitem_issues_timestampitem_issues_statusitem_issues_reported_byitem_issues_resolved_byitem_issues_commentitem_issues_countsword_depositorsword_slugexemplarhome_pagetitleoai_pmhsword_endpointrss_feedtwitter_feeddescriptionfulltextopen_accessmandateorganisation_titleorganisation_home_pagelocation_countrylocation_citylocation_latitudelocation_longitudesoftwaregeonameversionsubjectsdatenotesuggestionsactivity_lowactivity_mediumactivity_highrecordcountrecordhistoryfulltexts_totalfulltexts_docsfulltexts_rtotalfulltexts_rdocsregistry_nameregistry_idsubmit_tosubmitted_to_namesubmitted_to_donewebometrics_rankwebometrics_sizewebometrics_visibilitywebometrics_rich_fileswebometrics_scholarmonthly_depositstotal_depositsassociation
4167461228archive1380NaNNaNdisk0/00/00/46/122012-01-08 03:17:022012-04-16 10:53:042012-01-08 03:17:02institutionalNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNhttp://dspace.library.iitb.ac.in/jspui/IIT Bombay Institutional Repositoryhttp://dspace.library.iitb.ac.in/oai/requestNaNhttp://dspace.library.iitb.ac.in/xmlui/feed/at...NaNNaNTRUETRUETRUEIIT Bombayhttp://www.iitb.ac.ininMumbai19.13372.9166dspacegeoname_2_INotherTA2011-12-15 09:01:35NaNNaN000990,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,95,9...NaNNaNNaNNaNcelestial4790NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
41684612NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTDNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
41694612NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTHNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
41704612NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTJNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
41714612NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTKNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
41724612NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
41734612NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTPNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1643646498archive1380NaNNaNdisk0/00/00/46/492012-02-05 13:57:012012-04-16 10:39:582012-02-05 13:57:01institutionalNaNNaNshowNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNhttp://dspace.library.iitb.ac.in/jspui/IIT Bombay Institutional Repositoryhttp://dspace.library.iitb.ac.in/oaiNaNhttp://dspace.library.iitb.ac.in/xmlui/feed/rs...NaNNaNTRUETRUEFALSEIIT Bombayhttp://www.iitb.ac.ininMumbai19.13372.9166dspacegeoname_2_INotherT12012-01-05 12:09:37NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNcelestial4789NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
164374649NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNTANaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " eprintid rev_number eprint_status userid importid source \\\n", "4167 4612 28 archive 1380 NaN NaN \n", "4168 4612 NaN NaN NaN NaN NaN \n", "4169 4612 NaN NaN NaN NaN NaN \n", "4170 4612 NaN NaN NaN NaN NaN \n", "4171 4612 NaN NaN NaN NaN NaN \n", "4172 4612 NaN NaN NaN NaN NaN \n", "4173 4612 NaN NaN NaN NaN NaN \n", "16436 4649 8 archive 1380 NaN NaN \n", "16437 4649 NaN NaN NaN NaN NaN \n", "\n", " dir datestamp lastmod \\\n", "4167 disk0/00/00/46/12 2012-01-08 03:17:02 2012-04-16 10:53:04 \n", "4168 NaN NaN NaN \n", "4169 NaN NaN NaN \n", "4170 NaN NaN NaN \n", "4171 NaN NaN NaN \n", "4172 NaN NaN NaN \n", "4173 NaN NaN NaN \n", "16436 disk0/00/00/46/49 2012-02-05 13:57:01 2012-04-16 10:39:58 \n", "16437 NaN NaN NaN \n", "\n", " status_changed type succeeds commentary \\\n", "4167 2012-01-08 03:17:02 institutional NaN NaN \n", "4168 NaN NaN NaN NaN \n", "4169 NaN NaN NaN NaN \n", "4170 NaN NaN NaN NaN \n", "4171 NaN NaN NaN NaN \n", "4172 NaN NaN NaN NaN \n", "4173 NaN NaN NaN NaN \n", "16436 2012-02-05 13:57:01 institutional NaN NaN \n", "16437 NaN NaN NaN NaN \n", "\n", " metadata_visibility latitude longitude relation_type relation_uri \\\n", "4167 show NaN NaN NaN NaN \n", "4168 NaN NaN NaN NaN NaN \n", "4169 NaN NaN NaN NaN NaN \n", "4170 NaN NaN NaN NaN NaN \n", "4171 NaN NaN NaN NaN NaN \n", "4172 NaN NaN NaN NaN NaN \n", "4173 NaN NaN NaN NaN NaN \n", "16436 show NaN NaN NaN NaN \n", "16437 NaN NaN NaN NaN NaN \n", "\n", " item_issues_id item_issues_type item_issues_description \\\n", "4167 NaN NaN NaN \n", "4168 NaN NaN NaN \n", "4169 NaN NaN NaN \n", "4170 NaN NaN NaN \n", "4171 NaN NaN NaN \n", "4172 NaN NaN NaN \n", "4173 NaN NaN NaN \n", "16436 NaN NaN NaN \n", "16437 NaN NaN NaN \n", "\n", " item_issues_timestamp item_issues_status item_issues_reported_by \\\n", "4167 NaN NaN NaN \n", "4168 NaN NaN NaN \n", "4169 NaN NaN NaN \n", "4170 NaN NaN NaN \n", "4171 NaN NaN NaN \n", "4172 NaN NaN NaN \n", "4173 NaN NaN NaN \n", "16436 NaN NaN NaN \n", "16437 NaN NaN NaN \n", "\n", " item_issues_resolved_by item_issues_comment item_issues_count \\\n", "4167 NaN NaN NaN \n", "4168 NaN NaN NaN \n", "4169 NaN NaN NaN \n", "4170 NaN NaN NaN \n", "4171 NaN NaN NaN \n", "4172 NaN NaN NaN \n", "4173 NaN NaN NaN \n", "16436 NaN NaN NaN \n", "16437 NaN NaN NaN \n", "\n", " sword_depositor sword_slug exemplar \\\n", "4167 NaN NaN NaN \n", "4168 NaN NaN NaN \n", "4169 NaN NaN NaN \n", "4170 NaN NaN NaN \n", "4171 NaN NaN NaN \n", "4172 NaN NaN NaN \n", "4173 NaN NaN NaN \n", "16436 NaN NaN NaN \n", "16437 NaN NaN NaN \n", "\n", " home_page \\\n", "4167 http://dspace.library.iitb.ac.in/jspui/ \n", "4168 NaN \n", "4169 NaN \n", "4170 NaN \n", "4171 NaN \n", "4172 NaN \n", "4173 NaN \n", "16436 http://dspace.library.iitb.ac.in/jspui/ \n", "16437 NaN \n", "\n", " title \\\n", "4167 IIT Bombay Institutional Repository \n", "4168 NaN \n", "4169 NaN \n", "4170 NaN \n", "4171 NaN \n", "4172 NaN \n", "4173 NaN \n", "16436 IIT Bombay Institutional Repository \n", "16437 NaN \n", "\n", " oai_pmh sword_endpoint \\\n", "4167 http://dspace.library.iitb.ac.in/oai/request NaN \n", "4168 NaN NaN \n", "4169 NaN NaN \n", "4170 NaN NaN \n", "4171 NaN NaN \n", "4172 NaN NaN \n", "4173 NaN NaN \n", "16436 http://dspace.library.iitb.ac.in/oai NaN \n", "16437 NaN NaN \n", "\n", " rss_feed twitter_feed \\\n", "4167 http://dspace.library.iitb.ac.in/xmlui/feed/at... NaN \n", "4168 NaN NaN \n", "4169 NaN NaN \n", "4170 NaN NaN \n", "4171 NaN NaN \n", "4172 NaN NaN \n", "4173 NaN NaN \n", "16436 http://dspace.library.iitb.ac.in/xmlui/feed/rs... NaN \n", "16437 NaN NaN \n", "\n", " description fulltext open_access mandate organisation_title \\\n", "4167 NaN TRUE TRUE TRUE IIT Bombay \n", "4168 NaN NaN NaN NaN NaN \n", "4169 NaN NaN NaN NaN NaN \n", "4170 NaN NaN NaN NaN NaN \n", "4171 NaN NaN NaN NaN NaN \n", "4172 NaN NaN NaN NaN NaN \n", "4173 NaN NaN NaN NaN NaN \n", "16436 NaN TRUE TRUE FALSE IIT Bombay \n", "16437 NaN NaN NaN NaN NaN \n", "\n", " organisation_home_page location_country location_city location_latitude \\\n", "4167 http://www.iitb.ac.in in Mumbai 19.133 \n", "4168 NaN NaN NaN NaN \n", "4169 NaN NaN NaN NaN \n", "4170 NaN NaN NaN NaN \n", "4171 NaN NaN NaN NaN \n", "4172 NaN NaN NaN NaN \n", "4173 NaN NaN NaN NaN \n", "16436 http://www.iitb.ac.in in Mumbai 19.133 \n", "16437 NaN NaN NaN NaN \n", "\n", " location_longitude software geoname version subjects \\\n", "4167 72.9166 dspace geoname_2_IN other TA \n", "4168 NaN NaN NaN NaN TD \n", "4169 NaN NaN NaN NaN TH \n", "4170 NaN NaN NaN NaN TJ \n", "4171 NaN NaN NaN NaN TK \n", "4172 NaN NaN NaN NaN TN \n", "4173 NaN NaN NaN NaN TP \n", "16436 72.9166 dspace geoname_2_IN other T1 \n", "16437 NaN NaN NaN NaN TA \n", "\n", " date note suggestions activity_low activity_medium \\\n", "4167 2011-12-15 09:01:35 NaN NaN 0 0 \n", "4168 NaN NaN NaN NaN NaN \n", "4169 NaN NaN NaN NaN NaN \n", "4170 NaN NaN NaN NaN NaN \n", "4171 NaN NaN NaN NaN NaN \n", "4172 NaN NaN NaN NaN NaN \n", "4173 NaN NaN NaN NaN NaN \n", "16436 2012-01-05 12:09:37 NaN NaN NaN NaN \n", "16437 NaN NaN NaN NaN NaN \n", "\n", " activity_high recordcount \\\n", "4167 0 99 \n", "4168 NaN NaN \n", "4169 NaN NaN \n", "4170 NaN NaN \n", "4171 NaN NaN \n", "4172 NaN NaN \n", "4173 NaN NaN \n", "16436 NaN NaN \n", "16437 NaN NaN \n", "\n", " recordhistory fulltexts_total \\\n", "4167 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,95,9... NaN \n", "4168 NaN NaN \n", "4169 NaN NaN \n", "4170 NaN NaN \n", "4171 NaN NaN \n", "4172 NaN NaN \n", "4173 NaN NaN \n", "16436 NaN NaN \n", "16437 NaN NaN \n", "\n", " fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name \\\n", "4167 NaN NaN NaN celestial \n", "4168 NaN NaN NaN NaN \n", "4169 NaN NaN NaN NaN \n", "4170 NaN NaN NaN NaN \n", "4171 NaN NaN NaN NaN \n", "4172 NaN NaN NaN NaN \n", "4173 NaN NaN NaN NaN \n", "16436 NaN NaN NaN celestial \n", "16437 NaN NaN NaN NaN \n", "\n", " registry_id submit_to submitted_to_name submitted_to_done \\\n", "4167 4790 NaN NaN NaN \n", "4168 NaN NaN NaN NaN \n", "4169 NaN NaN NaN NaN \n", "4170 NaN NaN NaN NaN \n", "4171 NaN NaN NaN NaN \n", "4172 NaN NaN NaN NaN \n", "4173 NaN NaN NaN NaN \n", "16436 4789 NaN NaN NaN \n", "16437 NaN NaN NaN NaN \n", "\n", " webometrics_rank webometrics_size webometrics_visibility \\\n", "4167 NaN NaN NaN \n", "4168 NaN NaN NaN \n", "4169 NaN NaN NaN \n", "4170 NaN NaN NaN \n", "4171 NaN NaN NaN \n", "4172 NaN NaN NaN \n", "4173 NaN NaN NaN \n", "16436 NaN NaN NaN \n", "16437 NaN NaN NaN \n", "\n", " webometrics_rich_files webometrics_scholar monthly_deposits \\\n", "4167 NaN NaN NaN \n", "4168 NaN NaN NaN \n", "4169 NaN NaN NaN \n", "4170 NaN NaN NaN \n", "4171 NaN NaN NaN \n", "4172 NaN NaN NaN \n", "4173 NaN NaN NaN \n", "16436 NaN NaN NaN \n", "16437 NaN NaN NaN \n", "\n", " total_deposits association \n", "4167 NaN NaN \n", "4168 NaN NaN \n", "4169 NaN NaN \n", "4170 NaN NaN \n", "4171 NaN NaN \n", "4172 NaN NaN \n", "4173 NaN NaN \n", "16436 NaN NaN \n", "16437 NaN NaN " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "roar_df[roar_df.eprintid.isin(['4612', '4649'])]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
openaire_idre3data_idrepository_nameadditional_namerepository_urlrepository_iddescriptiontypesizeupdate_datestart_dateend_datesubjectmission_statementcontent_typeprovider_typekeywordinstitutionpolicydatabase_accessdatabase_licensedata_accessdata_licensedata_uploaddata_upload_licensesoftwareversioningapipid_systemcitation_guideline_urlaid_systemenhanced_publicationquality_managementcertificatemetadata_standardsyndicationremarksentry_datelast_update
1090re3data_____::4af9fe2bb93511a5e0f0c39e94d6557fr3d100011306RESID Database of Protein Modifications[]https://pir.georgetown.edu/resid/resid.shtml[FAIRsharing_doi:10.25504/FAIRsharing.qaszjp, ...The RESID Database of Protein Modifications is...[disciplinary]NaNNaN2014NaN[2 Life Sciences, 201 Basic Biological and Med...false[Images, Structured text][dataProvider][genomes, life sciences, proteins, proteomes, ...[[Georgetown University, Medical Center, [GUMC...truetruefalsetruetruetruefalsetrueyestruetruetruetrueyesunknownfalsefalsefalseRESID is covered by Thomson Reuters Data Citat...2014-12-052019-01-17
\n", "
" ], "text/plain": [ " openaire_id re3data_id \\\n", "1090 re3data_____::4af9fe2bb93511a5e0f0c39e94d6557f r3d100011306 \n", "\n", " repository_name additional_name \\\n", "1090 RESID Database of Protein Modifications [] \n", "\n", " repository_url \\\n", "1090 https://pir.georgetown.edu/resid/resid.shtml \n", "\n", " repository_id \\\n", "1090 [FAIRsharing_doi:10.25504/FAIRsharing.qaszjp, ... \n", "\n", " description type size \\\n", "1090 The RESID Database of Protein Modifications is... [disciplinary] NaN \n", "\n", " update_date start_date end_date \\\n", "1090 NaN 2014 NaN \n", "\n", " subject mission_statement \\\n", "1090 [2 Life Sciences, 201 Basic Biological and Med... false \n", "\n", " content_type provider_type \\\n", "1090 [Images, Structured text] [dataProvider] \n", "\n", " keyword \\\n", "1090 [genomes, life sciences, proteins, proteomes, ... \n", "\n", " institution policy \\\n", "1090 [[Georgetown University, Medical Center, [GUMC... true \n", "\n", " database_access database_license data_access data_license data_upload \\\n", "1090 true false true true true \n", "\n", " data_upload_license software versioning api pid_system \\\n", "1090 false true yes true true \n", "\n", " citation_guideline_url aid_system enhanced_publication \\\n", "1090 true true yes \n", "\n", " quality_management certificate metadata_standard syndication \\\n", "1090 unknown false false false \n", "\n", " remarks entry_date \\\n", "1090 RESID is covered by Thomson Reuters Data Citat... 2014-12-05 \n", "\n", " last_update \n", "1090 2019-01-17 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_df[re3data_df.re3data_id == 'r3d100011306']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }