new dedup file
This commit is contained in:
parent
7ab83cbb10
commit
6abcd9b142
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -2,7 +2,7 @@
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 41,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -26,7 +26,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 46,
|
"execution_count": 19,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
@ -59,84 +59,36 @@
|
||||||
" </thead>\n",
|
" </thead>\n",
|
||||||
" <tbody>\n",
|
" <tbody>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>0</th>\n",
|
" <th>count</th>\n",
|
||||||
" <td>dedup::29a83a8a9641bb860a679d7e5ba52d26</td>\n",
|
" <td>4513</td>\n",
|
||||||
" <td>14174</td>\n",
|
" <td>4513</td>\n",
|
||||||
" <td>14174</td>\n",
|
" <td>4513</td>\n",
|
||||||
" <td>OHIO Open Library | Ohio University Research</td>\n",
|
" <td>4513</td>\n",
|
||||||
|
" <td>4513</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>unique</th>\n",
|
||||||
|
" <td>2145</td>\n",
|
||||||
|
" <td>4513</td>\n",
|
||||||
|
" <td>4261</td>\n",
|
||||||
|
" <td>3894</td>\n",
|
||||||
|
" <td>4</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>top</th>\n",
|
||||||
|
" <td>dedup::75e33da9b103b7b91dcd8da0abe1354b</td>\n",
|
||||||
|
" <td>https://fairsharing.org/bsg-d001520</td>\n",
|
||||||
|
" <td>3860</td>\n",
|
||||||
|
" <td>UPN JATIM REPOSITORY</td>\n",
|
||||||
" <td>roar</td>\n",
|
" <td>roar</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>1</th>\n",
|
" <th>freq</th>\n",
|
||||||
" <td>dedup::000871c1fc726f0b52dc86a4eeb027de</td>\n",
|
" <td>5</td>\n",
|
||||||
" <td>4612</td>\n",
|
" <td>1</td>\n",
|
||||||
" <td>4612</td>\n",
|
" <td>2</td>\n",
|
||||||
" <td>IIT Bombay Institutional Repository</td>\n",
|
" <td>4</td>\n",
|
||||||
" <td>roar</td>\n",
|
" <td>1933</td>\n",
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2</th>\n",
|
|
||||||
" <td>dedup::000871c1fc726f0b52dc86a4eeb027de</td>\n",
|
|
||||||
" <td>4649</td>\n",
|
|
||||||
" <td>4649</td>\n",
|
|
||||||
" <td>IIT Bombay Institutional Repository</td>\n",
|
|
||||||
" <td>roar</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>3</th>\n",
|
|
||||||
" <td>dedup::001e6d882e54c780ce269d3c46997287</td>\n",
|
|
||||||
" <td>re3data_____::4af9fe2bb93511a5e0f0c39e94d6557f</td>\n",
|
|
||||||
" <td>r3d100011306</td>\n",
|
|
||||||
" <td>RESID Database of Protein Modifications</td>\n",
|
|
||||||
" <td>re3data</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>4</th>\n",
|
|
||||||
" <td>dedup::001e6d882e54c780ce269d3c46997287</td>\n",
|
|
||||||
" <td>https://fairsharing.org/10.25504/FAIRsharing.q...</td>\n",
|
|
||||||
" <td>https://fairsharing.org/10.25504/FAIRsharing.q...</td>\n",
|
|
||||||
" <td>RESID Database of Protein Modifications</td>\n",
|
|
||||||
" <td>FAIRsharing</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>5</th>\n",
|
|
||||||
" <td>dedup::0023a1e3447fdb31836536cc903f1310</td>\n",
|
|
||||||
" <td>opendoar____::c6f798b844366ccd65d99bc7f31e0e02</td>\n",
|
|
||||||
" <td>3410</td>\n",
|
|
||||||
" <td>erucu: electronic repository of the ukrainian ...</td>\n",
|
|
||||||
" <td>OpenDOAR</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>6</th>\n",
|
|
||||||
" <td>dedup::0023a1e3447fdb31836536cc903f1310</td>\n",
|
|
||||||
" <td>10013</td>\n",
|
|
||||||
" <td>10013</td>\n",
|
|
||||||
" <td>ErUCU: Electronic repository of the Ukrainian ...</td>\n",
|
|
||||||
" <td>roar</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>7</th>\n",
|
|
||||||
" <td>dedup::003ab6b40af9b488decea7c582d150a2</td>\n",
|
|
||||||
" <td>https://fairsharing.org/10.25504/FAIRsharing.d...</td>\n",
|
|
||||||
" <td>https://fairsharing.org/10.25504/FAIRsharing.d...</td>\n",
|
|
||||||
" <td>Synapse</td>\n",
|
|
||||||
" <td>FAIRsharing</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>8</th>\n",
|
|
||||||
" <td>dedup::003ab6b40af9b488decea7c582d150a2</td>\n",
|
|
||||||
" <td>re3data_____::cafc5d99b7c187e24b40d958a16a91f1</td>\n",
|
|
||||||
" <td>r3d100011894</td>\n",
|
|
||||||
" <td>Synapse</td>\n",
|
|
||||||
" <td>re3data</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>9</th>\n",
|
|
||||||
" <td>dedup::0064f599ed0adb5870a5b3ffe438e485</td>\n",
|
|
||||||
" <td>16034</td>\n",
|
|
||||||
" <td>16034</td>\n",
|
|
||||||
" <td>Giresun University Institutional Repository</td>\n",
|
|
||||||
" <td>roar</td>\n",
|
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" </tbody>\n",
|
" </tbody>\n",
|
||||||
"</table>\n",
|
"</table>\n",
|
||||||
|
@ -144,210 +96,37 @@
|
||||||
],
|
],
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
" dedup_id \\\n",
|
" dedup_id \\\n",
|
||||||
"0 dedup::29a83a8a9641bb860a679d7e5ba52d26 \n",
|
"count 4513 \n",
|
||||||
"1 dedup::000871c1fc726f0b52dc86a4eeb027de \n",
|
"unique 2145 \n",
|
||||||
"2 dedup::000871c1fc726f0b52dc86a4eeb027de \n",
|
"top dedup::75e33da9b103b7b91dcd8da0abe1354b \n",
|
||||||
"3 dedup::001e6d882e54c780ce269d3c46997287 \n",
|
"freq 5 \n",
|
||||||
"4 dedup::001e6d882e54c780ce269d3c46997287 \n",
|
|
||||||
"5 dedup::0023a1e3447fdb31836536cc903f1310 \n",
|
|
||||||
"6 dedup::0023a1e3447fdb31836536cc903f1310 \n",
|
|
||||||
"7 dedup::003ab6b40af9b488decea7c582d150a2 \n",
|
|
||||||
"8 dedup::003ab6b40af9b488decea7c582d150a2 \n",
|
|
||||||
"9 dedup::0064f599ed0adb5870a5b3ffe438e485 \n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" duplicate_id \\\n",
|
" duplicate_id original_id name \\\n",
|
||||||
"0 14174 \n",
|
"count 4513 4513 4513 \n",
|
||||||
"1 4612 \n",
|
"unique 4513 4261 3894 \n",
|
||||||
"2 4649 \n",
|
"top https://fairsharing.org/bsg-d001520 3860 UPN JATIM REPOSITORY \n",
|
||||||
"3 re3data_____::4af9fe2bb93511a5e0f0c39e94d6557f \n",
|
"freq 1 2 4 \n",
|
||||||
"4 https://fairsharing.org/10.25504/FAIRsharing.q... \n",
|
|
||||||
"5 opendoar____::c6f798b844366ccd65d99bc7f31e0e02 \n",
|
|
||||||
"6 10013 \n",
|
|
||||||
"7 https://fairsharing.org/10.25504/FAIRsharing.d... \n",
|
|
||||||
"8 re3data_____::cafc5d99b7c187e24b40d958a16a91f1 \n",
|
|
||||||
"9 16034 \n",
|
|
||||||
"\n",
|
"\n",
|
||||||
" original_id \\\n",
|
" source \n",
|
||||||
"0 14174 \n",
|
"count 4513 \n",
|
||||||
"1 4612 \n",
|
"unique 4 \n",
|
||||||
"2 4649 \n",
|
"top roar \n",
|
||||||
"3 r3d100011306 \n",
|
"freq 1933 "
|
||||||
"4 https://fairsharing.org/10.25504/FAIRsharing.q... \n",
|
|
||||||
"5 3410 \n",
|
|
||||||
"6 10013 \n",
|
|
||||||
"7 https://fairsharing.org/10.25504/FAIRsharing.d... \n",
|
|
||||||
"8 r3d100011894 \n",
|
|
||||||
"9 16034 \n",
|
|
||||||
"\n",
|
|
||||||
" name source \n",
|
|
||||||
"0 OHIO Open Library | Ohio University Research roar \n",
|
|
||||||
"1 IIT Bombay Institutional Repository roar \n",
|
|
||||||
"2 IIT Bombay Institutional Repository roar \n",
|
|
||||||
"3 RESID Database of Protein Modifications re3data \n",
|
|
||||||
"4 RESID Database of Protein Modifications FAIRsharing \n",
|
|
||||||
"5 erucu: electronic repository of the ukrainian ... OpenDOAR \n",
|
|
||||||
"6 ErUCU: Electronic repository of the Ukrainian ... roar \n",
|
|
||||||
"7 Synapse FAIRsharing \n",
|
|
||||||
"8 Synapse re3data \n",
|
|
||||||
"9 Giresun University Institutional Repository roar "
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 46,
|
"execution_count": 19,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"df_09 = pd.read_csv('../data/interim/ds_dedup09.csv', sep=';')\n",
|
"df_09 = pd.read_csv('../data/interim/ds_dedup09.csv', sep=';', quotechar='\"', header=None, names=['dedup_id', 'duplicate_id', 'original_id', 'name', 'source'])\n",
|
||||||
"df_09.head(10)"
|
"df_09.describe()"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 24,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/html": [
|
|
||||||
"<div>\n",
|
|
||||||
"<style scoped>\n",
|
|
||||||
" .dataframe tbody tr th:only-of-type {\n",
|
|
||||||
" vertical-align: middle;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe tbody tr th {\n",
|
|
||||||
" vertical-align: top;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe thead th {\n",
|
|
||||||
" text-align: right;\n",
|
|
||||||
" }\n",
|
|
||||||
"</style>\n",
|
|
||||||
"<table border=\"1\" class=\"dataframe\">\n",
|
|
||||||
" <thead>\n",
|
|
||||||
" <tr style=\"text-align: right;\">\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th>dedup_id</th>\n",
|
|
||||||
" <th>duplicate_id</th>\n",
|
|
||||||
" <th>original_id</th>\n",
|
|
||||||
" <th>name</th>\n",
|
|
||||||
" <th>source</th>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </thead>\n",
|
|
||||||
" <tbody>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>5115</th>\n",
|
|
||||||
" <td>dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98</td>\n",
|
|
||||||
" <td>1126</td>\n",
|
|
||||||
" <td>1126</td>\n",
|
|
||||||
" <td>RIT Digital Media Library</td>\n",
|
|
||||||
" <td>roar</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>5116</th>\n",
|
|
||||||
" <td>dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98</td>\n",
|
|
||||||
" <td>opendoar____::443cb001c138b2561a0d90720d6ce111</td>\n",
|
|
||||||
" <td>648</td>\n",
|
|
||||||
" <td>rit digital media library</td>\n",
|
|
||||||
" <td>OpenDOAR</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </tbody>\n",
|
|
||||||
"</table>\n",
|
|
||||||
"</div>"
|
|
||||||
],
|
|
||||||
"text/plain": [
|
|
||||||
" dedup_id \\\n",
|
|
||||||
"5115 dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 \n",
|
|
||||||
"5116 dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 \n",
|
|
||||||
"\n",
|
|
||||||
" duplicate_id original_id \\\n",
|
|
||||||
"5115 1126 1126 \n",
|
|
||||||
"5116 opendoar____::443cb001c138b2561a0d90720d6ce111 648 \n",
|
|
||||||
"\n",
|
|
||||||
" name source \n",
|
|
||||||
"5115 RIT Digital Media Library roar \n",
|
|
||||||
"5116 rit digital media library OpenDOAR "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 24,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"df_09[df_09.dedup_id == 'dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98']"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 25,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/html": [
|
|
||||||
"<div>\n",
|
|
||||||
"<style scoped>\n",
|
|
||||||
" .dataframe tbody tr th:only-of-type {\n",
|
|
||||||
" vertical-align: middle;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe tbody tr th {\n",
|
|
||||||
" vertical-align: top;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe thead th {\n",
|
|
||||||
" text-align: right;\n",
|
|
||||||
" }\n",
|
|
||||||
"</style>\n",
|
|
||||||
"<table border=\"1\" class=\"dataframe\">\n",
|
|
||||||
" <thead>\n",
|
|
||||||
" <tr style=\"text-align: right;\">\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th>dedup_id</th>\n",
|
|
||||||
" <th>duplicate_id</th>\n",
|
|
||||||
" <th>original_id</th>\n",
|
|
||||||
" <th>name</th>\n",
|
|
||||||
" <th>source</th>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </thead>\n",
|
|
||||||
" <tbody>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>40</th>\n",
|
|
||||||
" <td>dedup::01846ae470651e97d2f73fce979406a9</td>\n",
|
|
||||||
" <td>opendoar____::b4d6f2b565ca0eef1f9245403aac366a</td>\n",
|
|
||||||
" <td>7668</td>\n",
|
|
||||||
" <td>digital commons at michigan state university c...</td>\n",
|
|
||||||
" <td>OpenDOAR</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </tbody>\n",
|
|
||||||
"</table>\n",
|
|
||||||
"</div>"
|
|
||||||
],
|
|
||||||
"text/plain": [
|
|
||||||
" dedup_id \\\n",
|
|
||||||
"40 dedup::01846ae470651e97d2f73fce979406a9 \n",
|
|
||||||
"\n",
|
|
||||||
" duplicate_id original_id \\\n",
|
|
||||||
"40 opendoar____::b4d6f2b565ca0eef1f9245403aac366a 7668 \n",
|
|
||||||
"\n",
|
|
||||||
" name source \n",
|
|
||||||
"40 digital commons at michigan state university c... OpenDOAR "
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 25,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"df_09[df_09.dedup_id == 'dedup::01846ae470651e97d2f73fce979406a9']"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -365,7 +144,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 14,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -380,7 +159,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 15,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -389,7 +168,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": 16,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
@ -400,7 +179,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 17,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
@ -434,43 +213,43 @@
|
||||||
" <tbody>\n",
|
" <tbody>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>0</th>\n",
|
" <th>0</th>\n",
|
||||||
" <td>dedup::29a83a8a9641bb860a679d7e5ba52d26</td>\n",
|
" <td>dedup::252773ebafcbbac75238b419d964068e</td>\n",
|
||||||
" <td>14174</td>\n",
|
" <td>https://fairsharing.org/bsg-d001520</td>\n",
|
||||||
" <td>14174</td>\n",
|
" <td>https://fairsharing.org/bsg-d001520</td>\n",
|
||||||
" <td>OHIO Open Library | Ohio University Research</td>\n",
|
" <td>ACTRIS Data Centre</td>\n",
|
||||||
" <td>roar</td>\n",
|
" <td>FAIRsharing</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>1</th>\n",
|
" <th>1</th>\n",
|
||||||
" <td>dedup::000871c1fc726f0b52dc86a4eeb027de</td>\n",
|
" <td>dedup::860320be12a1c050cd7731794e231bd3</td>\n",
|
||||||
" <td>4612</td>\n",
|
" <td>opendoar____::2290a7385ed77cc5592dc2153229f082</td>\n",
|
||||||
" <td>4612</td>\n",
|
" <td>1064</td>\n",
|
||||||
" <td>IIT Bombay Institutional Repository</td>\n",
|
" <td>oxford university research archive</td>\n",
|
||||||
" <td>roar</td>\n",
|
" <td>OpenDOAR</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>2</th>\n",
|
" <th>2</th>\n",
|
||||||
" <td>dedup::000871c1fc726f0b52dc86a4eeb027de</td>\n",
|
" <td>dedup::1aa7a8773e6a7fdacbcedf9999009a38</td>\n",
|
||||||
" <td>4649</td>\n",
|
" <td>opendoar____::191f8f858acda435ae0daf994e2a72c2</td>\n",
|
||||||
" <td>4649</td>\n",
|
" <td>8648</td>\n",
|
||||||
" <td>IIT Bombay Institutional Repository</td>\n",
|
" <td>digital commons@georgia southern</td>\n",
|
||||||
" <td>roar</td>\n",
|
" <td>OpenDOAR</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>3</th>\n",
|
" <th>3</th>\n",
|
||||||
" <td>dedup::001e6d882e54c780ce269d3c46997287</td>\n",
|
" <td>dedup::4801441f041958afaca324c43c40787b</td>\n",
|
||||||
" <td>re3data_____::4af9fe2bb93511a5e0f0c39e94d6557f</td>\n",
|
" <td>10453</td>\n",
|
||||||
" <td>r3d100011306</td>\n",
|
" <td>10453</td>\n",
|
||||||
" <td>RESID Database of Protein Modifications</td>\n",
|
" <td>MCStor</td>\n",
|
||||||
" <td>re3data</td>\n",
|
" <td>roar</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>4</th>\n",
|
" <th>4</th>\n",
|
||||||
" <td>dedup::001e6d882e54c780ce269d3c46997287</td>\n",
|
" <td>dedup::2841194266115ac1cc04d19630cde46b</td>\n",
|
||||||
" <td>https://fairsharing.org/10.25504/FAIRsharing.q...</td>\n",
|
" <td>re3data_____::3afbb2b45a3dd218a5a091ca773cf6c5</td>\n",
|
||||||
" <td>https://fairsharing.org/10.25504/FAIRsharing.q...</td>\n",
|
" <td>r3d100011189</td>\n",
|
||||||
" <td>RESID Database of Protein Modifications</td>\n",
|
" <td>PRISM: University of Calgary's Digital Repository</td>\n",
|
||||||
" <td>FAIRsharing</td>\n",
|
" <td>re3data</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" </tbody>\n",
|
" </tbody>\n",
|
||||||
"</table>\n",
|
"</table>\n",
|
||||||
|
@ -478,35 +257,35 @@
|
||||||
],
|
],
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
" dedup_id \\\n",
|
" dedup_id \\\n",
|
||||||
"0 dedup::29a83a8a9641bb860a679d7e5ba52d26 \n",
|
"0 dedup::252773ebafcbbac75238b419d964068e \n",
|
||||||
"1 dedup::000871c1fc726f0b52dc86a4eeb027de \n",
|
"1 dedup::860320be12a1c050cd7731794e231bd3 \n",
|
||||||
"2 dedup::000871c1fc726f0b52dc86a4eeb027de \n",
|
"2 dedup::1aa7a8773e6a7fdacbcedf9999009a38 \n",
|
||||||
"3 dedup::001e6d882e54c780ce269d3c46997287 \n",
|
"3 dedup::4801441f041958afaca324c43c40787b \n",
|
||||||
"4 dedup::001e6d882e54c780ce269d3c46997287 \n",
|
"4 dedup::2841194266115ac1cc04d19630cde46b \n",
|
||||||
"\n",
|
"\n",
|
||||||
" duplicate_id \\\n",
|
" duplicate_id \\\n",
|
||||||
"0 14174 \n",
|
"0 https://fairsharing.org/bsg-d001520 \n",
|
||||||
"1 4612 \n",
|
"1 opendoar____::2290a7385ed77cc5592dc2153229f082 \n",
|
||||||
"2 4649 \n",
|
"2 opendoar____::191f8f858acda435ae0daf994e2a72c2 \n",
|
||||||
"3 re3data_____::4af9fe2bb93511a5e0f0c39e94d6557f \n",
|
"3 10453 \n",
|
||||||
"4 https://fairsharing.org/10.25504/FAIRsharing.q... \n",
|
"4 re3data_____::3afbb2b45a3dd218a5a091ca773cf6c5 \n",
|
||||||
"\n",
|
"\n",
|
||||||
" original_id \\\n",
|
" original_id \\\n",
|
||||||
"0 14174 \n",
|
"0 https://fairsharing.org/bsg-d001520 \n",
|
||||||
"1 4612 \n",
|
"1 1064 \n",
|
||||||
"2 4649 \n",
|
"2 8648 \n",
|
||||||
"3 r3d100011306 \n",
|
"3 10453 \n",
|
||||||
"4 https://fairsharing.org/10.25504/FAIRsharing.q... \n",
|
"4 r3d100011189 \n",
|
||||||
"\n",
|
"\n",
|
||||||
" name source \n",
|
" name source \n",
|
||||||
"0 OHIO Open Library | Ohio University Research roar \n",
|
"0 ACTRIS Data Centre FAIRsharing \n",
|
||||||
"1 IIT Bombay Institutional Repository roar \n",
|
"1 oxford university research archive OpenDOAR \n",
|
||||||
"2 IIT Bombay Institutional Repository roar \n",
|
"2 digital commons@georgia southern OpenDOAR \n",
|
||||||
"3 RESID Database of Protein Modifications re3data \n",
|
"3 MCStor roar \n",
|
||||||
"4 RESID Database of Protein Modifications FAIRsharing "
|
"4 PRISM: University of Calgary's Digital Repository re3data "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 7,
|
"execution_count": 17,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
|
@ -517,7 +296,7 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 8,
|
"execution_count": 44,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
@ -545,6 +324,7 @@
|
||||||
" <th>original_id</th>\n",
|
" <th>original_id</th>\n",
|
||||||
" <th>name</th>\n",
|
" <th>name</th>\n",
|
||||||
" <th>source</th>\n",
|
" <th>source</th>\n",
|
||||||
|
" <th>source_set</th>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup_id</th>\n",
|
" <th>dedup_id</th>\n",
|
||||||
|
@ -552,6 +332,7 @@
|
||||||
" <th></th>\n",
|
" <th></th>\n",
|
||||||
" <th></th>\n",
|
" <th></th>\n",
|
||||||
" <th></th>\n",
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" </thead>\n",
|
" </thead>\n",
|
||||||
" <tbody>\n",
|
" <tbody>\n",
|
||||||
|
@ -561,6 +342,7 @@
|
||||||
" <td>[4612, 4649]</td>\n",
|
" <td>[4612, 4649]</td>\n",
|
||||||
" <td>[IIT Bombay Institutional Repository, IIT Bomb...</td>\n",
|
" <td>[IIT Bombay Institutional Repository, IIT Bomb...</td>\n",
|
||||||
" <td>[roar, roar]</td>\n",
|
" <td>[roar, roar]</td>\n",
|
||||||
|
" <td>{roar}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::001e6d882e54c780ce269d3c46997287</th>\n",
|
" <th>dedup::001e6d882e54c780ce269d3c46997287</th>\n",
|
||||||
|
@ -568,6 +350,7 @@
|
||||||
" <td>[r3d100011306, https://fairsharing.org/10.2550...</td>\n",
|
" <td>[r3d100011306, https://fairsharing.org/10.2550...</td>\n",
|
||||||
" <td>[RESID Database of Protein Modifications, RESI...</td>\n",
|
" <td>[RESID Database of Protein Modifications, RESI...</td>\n",
|
||||||
" <td>[re3data, FAIRsharing]</td>\n",
|
" <td>[re3data, FAIRsharing]</td>\n",
|
||||||
|
" <td>{re3data, FAIRsharing}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::0023a1e3447fdb31836536cc903f1310</th>\n",
|
" <th>dedup::0023a1e3447fdb31836536cc903f1310</th>\n",
|
||||||
|
@ -575,6 +358,7 @@
|
||||||
" <td>[3410, 10013]</td>\n",
|
" <td>[3410, 10013]</td>\n",
|
||||||
" <td>[erucu: electronic repository of the ukrainian...</td>\n",
|
" <td>[erucu: electronic repository of the ukrainian...</td>\n",
|
||||||
" <td>[OpenDOAR, roar]</td>\n",
|
" <td>[OpenDOAR, roar]</td>\n",
|
||||||
|
" <td>{OpenDOAR, roar}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::003ab6b40af9b488decea7c582d150a2</th>\n",
|
" <th>dedup::003ab6b40af9b488decea7c582d150a2</th>\n",
|
||||||
|
@ -582,6 +366,7 @@
|
||||||
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
|
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
|
||||||
" <td>[Synapse, Synapse]</td>\n",
|
" <td>[Synapse, Synapse]</td>\n",
|
||||||
" <td>[FAIRsharing, re3data]</td>\n",
|
" <td>[FAIRsharing, re3data]</td>\n",
|
||||||
|
" <td>{re3data, FAIRsharing}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::0064f599ed0adb5870a5b3ffe438e485</th>\n",
|
" <th>dedup::0064f599ed0adb5870a5b3ffe438e485</th>\n",
|
||||||
|
@ -589,6 +374,7 @@
|
||||||
" <td>[16034, 9647]</td>\n",
|
" <td>[16034, 9647]</td>\n",
|
||||||
" <td>[Giresun University Institutional Repository, ...</td>\n",
|
" <td>[Giresun University Institutional Repository, ...</td>\n",
|
||||||
" <td>[roar, OpenDOAR]</td>\n",
|
" <td>[roar, OpenDOAR]</td>\n",
|
||||||
|
" <td>{OpenDOAR, roar}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>...</th>\n",
|
" <th>...</th>\n",
|
||||||
|
@ -596,6 +382,7 @@
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::ff49cc40a8890e6a60f40ff3026d2730</th>\n",
|
" <th>dedup::ff49cc40a8890e6a60f40ff3026d2730</th>\n",
|
||||||
|
@ -603,6 +390,7 @@
|
||||||
" <td>[1333, 1389]</td>\n",
|
" <td>[1333, 1389]</td>\n",
|
||||||
" <td>[UnissResearch, unissresearch]</td>\n",
|
" <td>[UnissResearch, unissresearch]</td>\n",
|
||||||
" <td>[roar, OpenDOAR]</td>\n",
|
" <td>[roar, OpenDOAR]</td>\n",
|
||||||
|
" <td>{OpenDOAR, roar}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::ff4d70de478038c72282b7e4af1d4260</th>\n",
|
" <th>dedup::ff4d70de478038c72282b7e4af1d4260</th>\n",
|
||||||
|
@ -610,6 +398,7 @@
|
||||||
" <td>[9752, 16367]</td>\n",
|
" <td>[9752, 16367]</td>\n",
|
||||||
" <td>[european xfel publication database, European ...</td>\n",
|
" <td>[european xfel publication database, European ...</td>\n",
|
||||||
" <td>[OpenDOAR, roar]</td>\n",
|
" <td>[OpenDOAR, roar]</td>\n",
|
||||||
|
" <td>{OpenDOAR, roar}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::ff826ce6ee85809389f18a5fafe72366</th>\n",
|
" <th>dedup::ff826ce6ee85809389f18a5fafe72366</th>\n",
|
||||||
|
@ -617,6 +406,7 @@
|
||||||
" <td>[3601, 2608]</td>\n",
|
" <td>[3601, 2608]</td>\n",
|
||||||
" <td>[electronic odessa national economic universit...</td>\n",
|
" <td>[electronic odessa national economic universit...</td>\n",
|
||||||
" <td>[OpenDOAR, OpenDOAR]</td>\n",
|
" <td>[OpenDOAR, OpenDOAR]</td>\n",
|
||||||
|
" <td>{OpenDOAR}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::ffbd6cbb019a1413183c8d08f2929307</th>\n",
|
" <th>dedup::ffbd6cbb019a1413183c8d08f2929307</th>\n",
|
||||||
|
@ -624,6 +414,7 @@
|
||||||
" <td>[3108, 1912]</td>\n",
|
" <td>[3108, 1912]</td>\n",
|
||||||
" <td>[Fotografía Sobre España en el Siglo XIX, foto...</td>\n",
|
" <td>[Fotografía Sobre España en el Siglo XIX, foto...</td>\n",
|
||||||
" <td>[roar, OpenDOAR]</td>\n",
|
" <td>[roar, OpenDOAR]</td>\n",
|
||||||
|
" <td>{OpenDOAR, roar}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98</th>\n",
|
" <th>dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98</th>\n",
|
||||||
|
@ -631,10 +422,11 @@
|
||||||
" <td>[1126, 648]</td>\n",
|
" <td>[1126, 648]</td>\n",
|
||||||
" <td>[RIT Digital Media Library, rit digital media ...</td>\n",
|
" <td>[RIT Digital Media Library, rit digital media ...</td>\n",
|
||||||
" <td>[roar, OpenDOAR]</td>\n",
|
" <td>[roar, OpenDOAR]</td>\n",
|
||||||
|
" <td>{OpenDOAR, roar}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" </tbody>\n",
|
" </tbody>\n",
|
||||||
"</table>\n",
|
"</table>\n",
|
||||||
"<p>2453 rows × 4 columns</p>\n",
|
"<p>2145 rows × 5 columns</p>\n",
|
||||||
"</div>"
|
"</div>"
|
||||||
],
|
],
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
|
@ -680,7 +472,7 @@
|
||||||
"dedup::ffbd6cbb019a1413183c8d08f2929307 [Fotografía Sobre España en el Siglo XIX, foto... \n",
|
"dedup::ffbd6cbb019a1413183c8d08f2929307 [Fotografía Sobre España en el Siglo XIX, foto... \n",
|
||||||
"dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [RIT Digital Media Library, rit digital media ... \n",
|
"dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [RIT Digital Media Library, rit digital media ... \n",
|
||||||
"\n",
|
"\n",
|
||||||
" source \n",
|
" source \\\n",
|
||||||
"dedup_id \n",
|
"dedup_id \n",
|
||||||
"dedup::000871c1fc726f0b52dc86a4eeb027de [roar, roar] \n",
|
"dedup::000871c1fc726f0b52dc86a4eeb027de [roar, roar] \n",
|
||||||
"dedup::001e6d882e54c780ce269d3c46997287 [re3data, FAIRsharing] \n",
|
"dedup::001e6d882e54c780ce269d3c46997287 [re3data, FAIRsharing] \n",
|
||||||
|
@ -694,22 +486,37 @@
|
||||||
"dedup::ffbd6cbb019a1413183c8d08f2929307 [roar, OpenDOAR] \n",
|
"dedup::ffbd6cbb019a1413183c8d08f2929307 [roar, OpenDOAR] \n",
|
||||||
"dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [roar, OpenDOAR] \n",
|
"dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 [roar, OpenDOAR] \n",
|
||||||
"\n",
|
"\n",
|
||||||
"[2453 rows x 4 columns]"
|
" source_set \n",
|
||||||
|
"dedup_id \n",
|
||||||
|
"dedup::000871c1fc726f0b52dc86a4eeb027de {roar} \n",
|
||||||
|
"dedup::001e6d882e54c780ce269d3c46997287 {re3data, FAIRsharing} \n",
|
||||||
|
"dedup::0023a1e3447fdb31836536cc903f1310 {OpenDOAR, roar} \n",
|
||||||
|
"dedup::003ab6b40af9b488decea7c582d150a2 {re3data, FAIRsharing} \n",
|
||||||
|
"dedup::0064f599ed0adb5870a5b3ffe438e485 {OpenDOAR, roar} \n",
|
||||||
|
"... ... \n",
|
||||||
|
"dedup::ff49cc40a8890e6a60f40ff3026d2730 {OpenDOAR, roar} \n",
|
||||||
|
"dedup::ff4d70de478038c72282b7e4af1d4260 {OpenDOAR, roar} \n",
|
||||||
|
"dedup::ff826ce6ee85809389f18a5fafe72366 {OpenDOAR} \n",
|
||||||
|
"dedup::ffbd6cbb019a1413183c8d08f2929307 {OpenDOAR, roar} \n",
|
||||||
|
"dedup::ffeed84c7cb1ae7bf4ec4bd78275bb98 {OpenDOAR, roar} \n",
|
||||||
|
"\n",
|
||||||
|
"[2145 rows x 5 columns]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 8,
|
"execution_count": 44,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"dup = df_09.groupby('dedup_id').aggregate(list)\n",
|
"dup = df_09.groupby('dedup_id').aggregate(list)\n",
|
||||||
|
"dup['source_set'] = dup.source.map(set)\n",
|
||||||
"dup"
|
"dup"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 9,
|
"execution_count": 45,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
@ -737,6 +544,7 @@
|
||||||
" <th>original_id</th>\n",
|
" <th>original_id</th>\n",
|
||||||
" <th>name</th>\n",
|
" <th>name</th>\n",
|
||||||
" <th>source</th>\n",
|
" <th>source</th>\n",
|
||||||
|
" <th>source_set</th>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup_id</th>\n",
|
" <th>dedup_id</th>\n",
|
||||||
|
@ -744,43 +552,49 @@
|
||||||
" <th></th>\n",
|
" <th></th>\n",
|
||||||
" <th></th>\n",
|
" <th></th>\n",
|
||||||
" <th></th>\n",
|
" <th></th>\n",
|
||||||
|
" <th></th>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" </thead>\n",
|
" </thead>\n",
|
||||||
" <tbody>\n",
|
" <tbody>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::01846ae470651e97d2f73fce979406a9</th>\n",
|
" <th>dedup::06138bc5af6023646ede0e1f7c1eac75</th>\n",
|
||||||
" <td>[opendoar____::b4d6f2b565ca0eef1f9245403aac366a]</td>\n",
|
|
||||||
" <td>[7668]</td>\n",
|
|
||||||
" <td>[digital commons at michigan state university ...</td>\n",
|
|
||||||
" <td>[OpenDOAR]</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>dedup::022036087426786cfd0f7f41fa7a2665</th>\n",
|
|
||||||
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
|
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
|
||||||
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
|
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
|
||||||
" <td>[World Data Center for Climate at DRKZ]</td>\n",
|
" <td>[Crystallography Open Database, Crystallograph...</td>\n",
|
||||||
" <td>[FAIRsharing]</td>\n",
|
" <td>[FAIRsharing, roar, re3data, OpenDOAR]</td>\n",
|
||||||
|
" <td>{re3data, OpenDOAR, roar, FAIRsharing}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::07e8b472e1e4af17a6b20ce083baf29f</th>\n",
|
" <th>dedup::0b7e684c89e746c67c9761ce2b65479c</th>\n",
|
||||||
" <td>[15036]</td>\n",
|
" <td>[re3data_____::44217da669f17a260c0958a679003a7...</td>\n",
|
||||||
" <td>[15036]</td>\n",
|
" <td>[r3d100010423, 375, https://fairsharing.org/10...</td>\n",
|
||||||
" <td>[MiCISAN]</td>\n",
|
" <td>[Woods Hole Open Access Server, woods hole ope...</td>\n",
|
||||||
" <td>[roar]</td>\n",
|
" <td>[re3data, OpenDOAR, FAIRsharing]</td>\n",
|
||||||
|
" <td>{re3data, FAIRsharing, OpenDOAR}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::0894634a3244e3050d8057a453e17e57</th>\n",
|
" <th>dedup::0bb4aec1710521c12ee76289d9440817</th>\n",
|
||||||
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
|
" <td>[re3data_____::eb721a14697a05c477d0ae23830e665...</td>\n",
|
||||||
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
|
" <td>[r3d100012596, 650, 349]</td>\n",
|
||||||
" <td>[European Variation Archive]</td>\n",
|
" <td>[Digitale Bibliothek Thüringen, digitale bibli...</td>\n",
|
||||||
" <td>[FAIRsharing]</td>\n",
|
" <td>[re3data, OpenDOAR, roar]</td>\n",
|
||||||
|
" <td>{re3data, roar, OpenDOAR}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::0a54b19a13b6712dc04d1b49215423d8</th>\n",
|
" <th>dedup::139042a4157a773f209847829d80894d</th>\n",
|
||||||
" <td>[opendoar____::d34ab169b70c9dcd35e62896010cd9ff]</td>\n",
|
" <td>[756, re3data_____::a95b34b344dc049963c35997fe...</td>\n",
|
||||||
" <td>[377]</td>\n",
|
" <td>[756, r3d100010690, 1330, 5487]</td>\n",
|
||||||
" <td>[yale medicine thesis digital library]</td>\n",
|
" <td>[Khazar University Institutional Repository, K...</td>\n",
|
||||||
" <td>[OpenDOAR]</td>\n",
|
" <td>[roar, re3data, OpenDOAR, roar]</td>\n",
|
||||||
|
" <td>{re3data, roar, OpenDOAR}</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>dedup::13d4bfa0321f86f042b34ec79064b316</th>\n",
|
||||||
|
" <td>[opendoar____::e3844e186e6eb8736e9f53c0c588952...</td>\n",
|
||||||
|
" <td>[9411, 15255, r3d100013135]</td>\n",
|
||||||
|
" <td>[fordatis, Fordatis, Fordatis]</td>\n",
|
||||||
|
" <td>[OpenDOAR, roar, re3data]</td>\n",
|
||||||
|
" <td>{OpenDOAR, re3data, roar}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>...</th>\n",
|
" <th>...</th>\n",
|
||||||
|
@ -788,143 +602,139 @@
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
" <td>...</td>\n",
|
" <td>...</td>\n",
|
||||||
|
" <td>...</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::f8306c8f16096b6d944799f4d427a976</th>\n",
|
" <th>dedup::eded0708dfe855304a50029fccf1a677</th>\n",
|
||||||
" <td>[re3data_____::574b553c6c374d597d2068ab2b117889]</td>\n",
|
" <td>[opendoar____::eccbc87e4b5ce2fe28308fd9f2a7baf...</td>\n",
|
||||||
" <td>[r3d100012041]</td>\n",
|
" <td>[3, r3d100012604, 5509]</td>\n",
|
||||||
" <td>[Canadian Disaster Database]</td>\n",
|
" <td>[ams acta, AMS Acta, AMS Acta]</td>\n",
|
||||||
" <td>[re3data]</td>\n",
|
" <td>[OpenDOAR, re3data, roar]</td>\n",
|
||||||
|
" <td>{OpenDOAR, roar, re3data}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::f9d8e2daaa9144310b66bf948e50d656</th>\n",
|
" <th>dedup::ef46a43afd7c7d67e21f4306bb1364e9</th>\n",
|
||||||
" <td>[re3data_____::95014789f83d7611ebfddace19d0523a]</td>\n",
|
" <td>[opendoar____::1f74a54f39b3123ad272ca0a06e7463...</td>\n",
|
||||||
" <td>[r3d100011045]</td>\n",
|
" <td>[5870, https://fairsharing.org/10.25504/FAIRsh...</td>\n",
|
||||||
" <td>[Index to Marine & Lacustrine Geological Samples]</td>\n",
|
" <td>[heidata, heiDATA, heiDATA]</td>\n",
|
||||||
" <td>[re3data]</td>\n",
|
" <td>[OpenDOAR, FAIRsharing, re3data]</td>\n",
|
||||||
|
" <td>{OpenDOAR, re3data, FAIRsharing}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::fcdbc4f504a15df8f78da88ee72fad32</th>\n",
|
" <th>dedup::f296bb3903d8a84d81c47e6db90764b9</th>\n",
|
||||||
" <td>[opendoar____::9f96f36b7aae3b1ff847c26ac94c604e]</td>\n",
|
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
|
||||||
" <td>[4979]</td>\n",
|
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
|
||||||
" <td>[university of minnesota law school]</td>\n",
|
" <td>[PubChem, PubChem, pubchem]</td>\n",
|
||||||
" <td>[OpenDOAR]</td>\n",
|
" <td>[FAIRsharing, re3data, OpenDOAR]</td>\n",
|
||||||
|
" <td>{re3data, OpenDOAR, FAIRsharing}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::fcfe9c770eb9372e6961a17f7eaffd5f</th>\n",
|
" <th>dedup::f3dc6512e46961c363ea402ff218c8fb</th>\n",
|
||||||
" <td>[4637]</td>\n",
|
" <td>[re3data_____::b2fc675049cbf485d9abbccf5232a31...</td>\n",
|
||||||
" <td>[4637]</td>\n",
|
" <td>[r3d100012538, 10171, https://fairsharing.org/...</td>\n",
|
||||||
" <td>[Simon Fraser University Institutional Reposit...</td>\n",
|
" <td>[DataverseNO, dataverseno, DataverseNO]</td>\n",
|
||||||
" <td>[roar]</td>\n",
|
" <td>[re3data, OpenDOAR, FAIRsharing]</td>\n",
|
||||||
|
" <td>{re3data, FAIRsharing, OpenDOAR}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>dedup::fe73f687e5bc5280214e0486b273a5f9</th>\n",
|
" <th>dedup::f9aa64cbb57131939eda048250f2dbae</th>\n",
|
||||||
" <td>[330]</td>\n",
|
" <td>[re3data_____::063765fa6d6358d62ea2d41dde32d3a...</td>\n",
|
||||||
" <td>[330]</td>\n",
|
" <td>[r3d100012692, https://fairsharing.org/10.2550...</td>\n",
|
||||||
" <td>[DigitalCommons@Fort Lewis College: Scholarshi...</td>\n",
|
" <td>[Scholars' Mine, Scholars' Mine, scholars mine]</td>\n",
|
||||||
" <td>[roar]</td>\n",
|
" <td>[re3data, FAIRsharing, OpenDOAR]</td>\n",
|
||||||
|
" <td>{re3data, OpenDOAR, FAIRsharing}</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" </tbody>\n",
|
" </tbody>\n",
|
||||||
"</table>\n",
|
"</table>\n",
|
||||||
"<p>109 rows × 4 columns</p>\n",
|
"<p>65 rows × 5 columns</p>\n",
|
||||||
"</div>"
|
"</div>"
|
||||||
],
|
],
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
" duplicate_id \\\n",
|
" duplicate_id \\\n",
|
||||||
"dedup_id \n",
|
"dedup_id \n",
|
||||||
"dedup::01846ae470651e97d2f73fce979406a9 [opendoar____::b4d6f2b565ca0eef1f9245403aac366a] \n",
|
"dedup::06138bc5af6023646ede0e1f7c1eac75 [https://fairsharing.org/10.25504/FAIRsharing.... \n",
|
||||||
"dedup::022036087426786cfd0f7f41fa7a2665 [https://fairsharing.org/10.25504/FAIRsharing.... \n",
|
"dedup::0b7e684c89e746c67c9761ce2b65479c [re3data_____::44217da669f17a260c0958a679003a7... \n",
|
||||||
"dedup::07e8b472e1e4af17a6b20ce083baf29f [15036] \n",
|
"dedup::0bb4aec1710521c12ee76289d9440817 [re3data_____::eb721a14697a05c477d0ae23830e665... \n",
|
||||||
"dedup::0894634a3244e3050d8057a453e17e57 [https://fairsharing.org/10.25504/FAIRsharing.... \n",
|
"dedup::139042a4157a773f209847829d80894d [756, re3data_____::a95b34b344dc049963c35997fe... \n",
|
||||||
"dedup::0a54b19a13b6712dc04d1b49215423d8 [opendoar____::d34ab169b70c9dcd35e62896010cd9ff] \n",
|
"dedup::13d4bfa0321f86f042b34ec79064b316 [opendoar____::e3844e186e6eb8736e9f53c0c588952... \n",
|
||||||
"... ... \n",
|
"... ... \n",
|
||||||
"dedup::f8306c8f16096b6d944799f4d427a976 [re3data_____::574b553c6c374d597d2068ab2b117889] \n",
|
"dedup::eded0708dfe855304a50029fccf1a677 [opendoar____::eccbc87e4b5ce2fe28308fd9f2a7baf... \n",
|
||||||
"dedup::f9d8e2daaa9144310b66bf948e50d656 [re3data_____::95014789f83d7611ebfddace19d0523a] \n",
|
"dedup::ef46a43afd7c7d67e21f4306bb1364e9 [opendoar____::1f74a54f39b3123ad272ca0a06e7463... \n",
|
||||||
"dedup::fcdbc4f504a15df8f78da88ee72fad32 [opendoar____::9f96f36b7aae3b1ff847c26ac94c604e] \n",
|
"dedup::f296bb3903d8a84d81c47e6db90764b9 [https://fairsharing.org/10.25504/FAIRsharing.... \n",
|
||||||
"dedup::fcfe9c770eb9372e6961a17f7eaffd5f [4637] \n",
|
"dedup::f3dc6512e46961c363ea402ff218c8fb [re3data_____::b2fc675049cbf485d9abbccf5232a31... \n",
|
||||||
"dedup::fe73f687e5bc5280214e0486b273a5f9 [330] \n",
|
"dedup::f9aa64cbb57131939eda048250f2dbae [re3data_____::063765fa6d6358d62ea2d41dde32d3a... \n",
|
||||||
"\n",
|
"\n",
|
||||||
" original_id \\\n",
|
" original_id \\\n",
|
||||||
"dedup_id \n",
|
"dedup_id \n",
|
||||||
"dedup::01846ae470651e97d2f73fce979406a9 [7668] \n",
|
"dedup::06138bc5af6023646ede0e1f7c1eac75 [https://fairsharing.org/10.25504/FAIRsharing.... \n",
|
||||||
"dedup::022036087426786cfd0f7f41fa7a2665 [https://fairsharing.org/10.25504/FAIRsharing.... \n",
|
"dedup::0b7e684c89e746c67c9761ce2b65479c [r3d100010423, 375, https://fairsharing.org/10... \n",
|
||||||
"dedup::07e8b472e1e4af17a6b20ce083baf29f [15036] \n",
|
"dedup::0bb4aec1710521c12ee76289d9440817 [r3d100012596, 650, 349] \n",
|
||||||
"dedup::0894634a3244e3050d8057a453e17e57 [https://fairsharing.org/10.25504/FAIRsharing.... \n",
|
"dedup::139042a4157a773f209847829d80894d [756, r3d100010690, 1330, 5487] \n",
|
||||||
"dedup::0a54b19a13b6712dc04d1b49215423d8 [377] \n",
|
"dedup::13d4bfa0321f86f042b34ec79064b316 [9411, 15255, r3d100013135] \n",
|
||||||
"... ... \n",
|
"... ... \n",
|
||||||
"dedup::f8306c8f16096b6d944799f4d427a976 [r3d100012041] \n",
|
"dedup::eded0708dfe855304a50029fccf1a677 [3, r3d100012604, 5509] \n",
|
||||||
"dedup::f9d8e2daaa9144310b66bf948e50d656 [r3d100011045] \n",
|
"dedup::ef46a43afd7c7d67e21f4306bb1364e9 [5870, https://fairsharing.org/10.25504/FAIRsh... \n",
|
||||||
"dedup::fcdbc4f504a15df8f78da88ee72fad32 [4979] \n",
|
"dedup::f296bb3903d8a84d81c47e6db90764b9 [https://fairsharing.org/10.25504/FAIRsharing.... \n",
|
||||||
"dedup::fcfe9c770eb9372e6961a17f7eaffd5f [4637] \n",
|
"dedup::f3dc6512e46961c363ea402ff218c8fb [r3d100012538, 10171, https://fairsharing.org/... \n",
|
||||||
"dedup::fe73f687e5bc5280214e0486b273a5f9 [330] \n",
|
"dedup::f9aa64cbb57131939eda048250f2dbae [r3d100012692, https://fairsharing.org/10.2550... \n",
|
||||||
"\n",
|
"\n",
|
||||||
" name \\\n",
|
" name \\\n",
|
||||||
"dedup_id \n",
|
"dedup_id \n",
|
||||||
"dedup::01846ae470651e97d2f73fce979406a9 [digital commons at michigan state university ... \n",
|
"dedup::06138bc5af6023646ede0e1f7c1eac75 [Crystallography Open Database, Crystallograph... \n",
|
||||||
"dedup::022036087426786cfd0f7f41fa7a2665 [World Data Center for Climate at DRKZ] \n",
|
"dedup::0b7e684c89e746c67c9761ce2b65479c [Woods Hole Open Access Server, woods hole ope... \n",
|
||||||
"dedup::07e8b472e1e4af17a6b20ce083baf29f [MiCISAN] \n",
|
"dedup::0bb4aec1710521c12ee76289d9440817 [Digitale Bibliothek Thüringen, digitale bibli... \n",
|
||||||
"dedup::0894634a3244e3050d8057a453e17e57 [European Variation Archive] \n",
|
"dedup::139042a4157a773f209847829d80894d [Khazar University Institutional Repository, K... \n",
|
||||||
"dedup::0a54b19a13b6712dc04d1b49215423d8 [yale medicine thesis digital library] \n",
|
"dedup::13d4bfa0321f86f042b34ec79064b316 [fordatis, Fordatis, Fordatis] \n",
|
||||||
"... ... \n",
|
"... ... \n",
|
||||||
"dedup::f8306c8f16096b6d944799f4d427a976 [Canadian Disaster Database] \n",
|
"dedup::eded0708dfe855304a50029fccf1a677 [ams acta, AMS Acta, AMS Acta] \n",
|
||||||
"dedup::f9d8e2daaa9144310b66bf948e50d656 [Index to Marine & Lacustrine Geological Samples] \n",
|
"dedup::ef46a43afd7c7d67e21f4306bb1364e9 [heidata, heiDATA, heiDATA] \n",
|
||||||
"dedup::fcdbc4f504a15df8f78da88ee72fad32 [university of minnesota law school] \n",
|
"dedup::f296bb3903d8a84d81c47e6db90764b9 [PubChem, PubChem, pubchem] \n",
|
||||||
"dedup::fcfe9c770eb9372e6961a17f7eaffd5f [Simon Fraser University Institutional Reposit... \n",
|
"dedup::f3dc6512e46961c363ea402ff218c8fb [DataverseNO, dataverseno, DataverseNO] \n",
|
||||||
"dedup::fe73f687e5bc5280214e0486b273a5f9 [DigitalCommons@Fort Lewis College: Scholarshi... \n",
|
"dedup::f9aa64cbb57131939eda048250f2dbae [Scholars' Mine, Scholars' Mine, scholars mine] \n",
|
||||||
"\n",
|
"\n",
|
||||||
" source \n",
|
" source \\\n",
|
||||||
"dedup_id \n",
|
"dedup_id \n",
|
||||||
"dedup::01846ae470651e97d2f73fce979406a9 [OpenDOAR] \n",
|
"dedup::06138bc5af6023646ede0e1f7c1eac75 [FAIRsharing, roar, re3data, OpenDOAR] \n",
|
||||||
"dedup::022036087426786cfd0f7f41fa7a2665 [FAIRsharing] \n",
|
"dedup::0b7e684c89e746c67c9761ce2b65479c [re3data, OpenDOAR, FAIRsharing] \n",
|
||||||
"dedup::07e8b472e1e4af17a6b20ce083baf29f [roar] \n",
|
"dedup::0bb4aec1710521c12ee76289d9440817 [re3data, OpenDOAR, roar] \n",
|
||||||
"dedup::0894634a3244e3050d8057a453e17e57 [FAIRsharing] \n",
|
"dedup::139042a4157a773f209847829d80894d [roar, re3data, OpenDOAR, roar] \n",
|
||||||
"dedup::0a54b19a13b6712dc04d1b49215423d8 [OpenDOAR] \n",
|
"dedup::13d4bfa0321f86f042b34ec79064b316 [OpenDOAR, roar, re3data] \n",
|
||||||
"... ... \n",
|
"... ... \n",
|
||||||
"dedup::f8306c8f16096b6d944799f4d427a976 [re3data] \n",
|
"dedup::eded0708dfe855304a50029fccf1a677 [OpenDOAR, re3data, roar] \n",
|
||||||
"dedup::f9d8e2daaa9144310b66bf948e50d656 [re3data] \n",
|
"dedup::ef46a43afd7c7d67e21f4306bb1364e9 [OpenDOAR, FAIRsharing, re3data] \n",
|
||||||
"dedup::fcdbc4f504a15df8f78da88ee72fad32 [OpenDOAR] \n",
|
"dedup::f296bb3903d8a84d81c47e6db90764b9 [FAIRsharing, re3data, OpenDOAR] \n",
|
||||||
"dedup::fcfe9c770eb9372e6961a17f7eaffd5f [roar] \n",
|
"dedup::f3dc6512e46961c363ea402ff218c8fb [re3data, OpenDOAR, FAIRsharing] \n",
|
||||||
"dedup::fe73f687e5bc5280214e0486b273a5f9 [roar] \n",
|
"dedup::f9aa64cbb57131939eda048250f2dbae [re3data, FAIRsharing, OpenDOAR] \n",
|
||||||
"\n",
|
"\n",
|
||||||
"[109 rows x 4 columns]"
|
" source_set \n",
|
||||||
|
"dedup_id \n",
|
||||||
|
"dedup::06138bc5af6023646ede0e1f7c1eac75 {re3data, OpenDOAR, roar, FAIRsharing} \n",
|
||||||
|
"dedup::0b7e684c89e746c67c9761ce2b65479c {re3data, FAIRsharing, OpenDOAR} \n",
|
||||||
|
"dedup::0bb4aec1710521c12ee76289d9440817 {re3data, roar, OpenDOAR} \n",
|
||||||
|
"dedup::139042a4157a773f209847829d80894d {re3data, roar, OpenDOAR} \n",
|
||||||
|
"dedup::13d4bfa0321f86f042b34ec79064b316 {OpenDOAR, re3data, roar} \n",
|
||||||
|
"... ... \n",
|
||||||
|
"dedup::eded0708dfe855304a50029fccf1a677 {OpenDOAR, roar, re3data} \n",
|
||||||
|
"dedup::ef46a43afd7c7d67e21f4306bb1364e9 {OpenDOAR, re3data, FAIRsharing} \n",
|
||||||
|
"dedup::f296bb3903d8a84d81c47e6db90764b9 {re3data, OpenDOAR, FAIRsharing} \n",
|
||||||
|
"dedup::f3dc6512e46961c363ea402ff218c8fb {re3data, FAIRsharing, OpenDOAR} \n",
|
||||||
|
"dedup::f9aa64cbb57131939eda048250f2dbae {re3data, OpenDOAR, FAIRsharing} \n",
|
||||||
|
"\n",
|
||||||
|
"[65 rows x 5 columns]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 9,
|
"execution_count": 45,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"dup[dup.source.str.len() == 1]"
|
"dup[dup.source_set.str.len() >= 3]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 10,
|
"execution_count": 33,
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"array([list(['r3d100013204', 'r3d100013458', 'r3d100012807', 'r3d100012808', 'r3d100012806', 'r3d100012805']),\n",
|
|
||||||
" list(['243', '5702', '5715', '5694', '5689', '5658', '5710', '5750', '5721', '5704']),\n",
|
|
||||||
" list(['2738', '4991', '2727', '2729', '2724', '2728', '2740', '174']),\n",
|
|
||||||
" list(['19', '8', '7', '11', '10', '13', '6', '12', '20', '15', '9', '5', '14', '16'])],\n",
|
|
||||||
" dtype=object)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 10,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"dup[dup.source.str.len() >= 6].original_id.values"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 11,
|
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
@ -1050,7 +860,7 @@
|
||||||
"5 None "
|
"5 None "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 11,
|
"execution_count": 33,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue