registries_analysis/notebooks/03-overlap.ipynb

14616 lines
685 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import ast\n",
"import csv\n",
"import json\n",
"import glom\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib_venn import venn2, venn2_circles\n",
"\n",
"import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n",
"import plotly.express as px\n",
"\n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>FAIRsharing_id</th>\n",
" <th>FAIRsharing_type</th>\n",
" <th>FAIRsharing_attributes.created-at</th>\n",
" <th>FAIRsharing_attributes.updated-at</th>\n",
" <th>FAIRsharing_attributes.metadata.doi</th>\n",
" <th>FAIRsharing_attributes.metadata.name</th>\n",
" <th>FAIRsharing_attributes.metadata.status</th>\n",
" <th>FAIRsharing_attributes.metadata.contacts</th>\n",
" <th>FAIRsharing_attributes.metadata.homepage</th>\n",
" <th>FAIRsharing_attributes.metadata.identifier</th>\n",
" <th>FAIRsharing_attributes.metadata.description</th>\n",
" <th>FAIRsharing_attributes.metadata.support-links</th>\n",
" <th>FAIRsharing_attributes.metadata.year-creation</th>\n",
" <th>FAIRsharing_attributes.metadata.data-processes</th>\n",
" <th>FAIRsharing_attributes.legacy-ids</th>\n",
" <th>FAIRsharing_attributes.fairsharing-registry</th>\n",
" <th>FAIRsharing_attributes.record-type</th>\n",
" <th>FAIRsharing_attributes.subjects</th>\n",
" <th>FAIRsharing_attributes.domains</th>\n",
" <th>FAIRsharing_attributes.taxonomies</th>\n",
" <th>FAIRsharing_attributes.user-defined-tags</th>\n",
" <th>FAIRsharing_attributes.countries</th>\n",
" <th>FAIRsharing_attributes.name</th>\n",
" <th>FAIRsharing_attributes.abbreviation</th>\n",
" <th>FAIRsharing_attributes.url</th>\n",
" <th>FAIRsharing_attributes.doi</th>\n",
" <th>FAIRsharing_attributes.fairsharing-licence</th>\n",
" <th>FAIRsharing_attributes.description</th>\n",
" <th>FAIRsharing_attributes.publications</th>\n",
" <th>FAIRsharing_attributes.licence-links</th>\n",
" <th>FAIRsharing_attributes.metadata.citations</th>\n",
" <th>FAIRsharing_attributes.metadata.abbreviation</th>\n",
" <th>FAIRsharing_attributes.metadata.access-points</th>\n",
" <th>FAIRsharing_attributes.metadata.associated-tools</th>\n",
" <th>FAIRsharing_attributes.metadata.deprecation-date</th>\n",
" <th>FAIRsharing_attributes.metadata.deprecation-reason</th>\n",
" <th>FAIRsharing_attributes.metadata.tombstone</th>\n",
" <th>FAIRsharing_unique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1723</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2014-11-04T15:23:40.000Z</td>\n",
" <td>2021-09-30T11:39:06.829Z</td>\n",
" <td>10.25504/FAIRsharing.8t18te</td>\n",
" <td>Cell Image Library</td>\n",
" <td>ready</td>\n",
" <td>[{'contact-name': 'David Orloff', 'contact-ema...</td>\n",
" <td>http://www.cellimagelibrary.org</td>\n",
" <td>1723</td>\n",
" <td>This library is a public and easily accessible...</td>\n",
" <td>[{'url': 'http://www.cellimagelibrary.org/page...</td>\n",
" <td>2010.0</td>\n",
" <td>[{'name': 'live update', 'type': 'data release...</td>\n",
" <td>[biodbcore-000180, bsg-d000180]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Cell Biology, Life Science]</td>\n",
" <td>[Cell, Microscopy, Light microscopy, Electron ...</td>\n",
" <td>[All]</td>\n",
" <td>[]</td>\n",
" <td>[United States]</td>\n",
" <td>FAIRsharing record for: Cell Image Library</td>\n",
" <td>None</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.8...</td>\n",
" <td>10.25504/FAIRsharing.8t18te</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: This librar...</td>\n",
" <td>[{'id': 232, 'pubmed_id': 23203874, 'title': '...</td>\n",
" <td>[{'licence-name': 'Cell Image Library Data Pol...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>FAIRsharing_1723</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3101</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2020-09-16T08:49:13.000Z</td>\n",
" <td>2021-09-30T11:36:45.452Z</td>\n",
" <td>NaN</td>\n",
" <td>WHOI Ship Data-Grabber System</td>\n",
" <td>ready</td>\n",
" <td>NaN</td>\n",
" <td>http://4dgeo.whoi.edu/shipdata/SDG_shipdata.html</td>\n",
" <td>3101</td>\n",
" <td>The WHOI Ship DataGrabber system provides the ...</td>\n",
" <td>[{'url': 'http://4dgeo.whoi.edu/shipdata/SDG_o...</td>\n",
" <td>2004.0</td>\n",
" <td>[{'url': 'http://4dgeo.whoi.edu/sdg-bin/dv_mai...</td>\n",
" <td>[biodbcore-001609, bsg-d001609]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Earth Science, Water Research, Oceanography]</td>\n",
" <td>[]</td>\n",
" <td>[Not applicable]</td>\n",
" <td>[subseafloor environments]</td>\n",
" <td>[United States]</td>\n",
" <td>FAIRsharing record for: WHOI Ship Data-Grabber...</td>\n",
" <td>None</td>\n",
" <td>https://fairsharing.org/fairsharing_records/3101</td>\n",
" <td>None</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: The WHOI Sh...</td>\n",
" <td>[]</td>\n",
" <td>[{'licence-name': 'NDSF Data Archive Policy', ...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>FAIRsharing_3101</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2649</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2018-08-07T20:23:32.000Z</td>\n",
" <td>2021-09-30T11:39:07.898Z</td>\n",
" <td>NaN</td>\n",
" <td>Electron Microscope Public Image Archive</td>\n",
" <td>ready</td>\n",
" <td>[{'contact-name': 'General contact', 'contact-...</td>\n",
" <td>https://www.ebi.ac.uk/pdbe/emdb/empiar/</td>\n",
" <td>2649</td>\n",
" <td>EMPIAR, the Electron Microscopy Public Image A...</td>\n",
" <td>[{'url': 'https://www.ebi.ac.uk/support/EMPIAR...</td>\n",
" <td>2015.0</td>\n",
" <td>[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi...</td>\n",
" <td>[biodbcore-001140, bsg-d001140]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Bioinformatics, Biology]</td>\n",
" <td>[Protein image, Microscopy, Electron microscop...</td>\n",
" <td>[All]</td>\n",
" <td>[]</td>\n",
" <td>[Greece, Czech Republic, United Kingdom, Icela...</td>\n",
" <td>FAIRsharing record for: Electron Microscope Pu...</td>\n",
" <td>EMPIAR</td>\n",
" <td>https://fairsharing.org/fairsharing_records/2649</td>\n",
" <td>None</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: EMPIAR, the...</td>\n",
" <td>[{'id': 2232, 'pubmed_id': 27067018, 'title': ...</td>\n",
" <td>[{'licence-name': 'EMBL-EBI Terms of Use', 'li...</td>\n",
" <td>[{'doi': '10.1038/nmeth.3806', 'pubmed-id': 27...</td>\n",
" <td>EMPIAR</td>\n",
" <td>[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi...</td>\n",
" <td>[{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>FAIRsharing_2649</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2657</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2018-08-13T15:12:11.000Z</td>\n",
" <td>2021-09-30T11:37:28.736Z</td>\n",
" <td>10.25504/FAIRsharing.tnByoG</td>\n",
" <td>ClinicalStudyDataRequest.com</td>\n",
" <td>ready</td>\n",
" <td>[{'contact-email': 'support@clinicalstudydatar...</td>\n",
" <td>https://clinicalstudydatarequest.com/</td>\n",
" <td>2657</td>\n",
" <td>ClinicalStudyDataRequest.com (CSDR) is a conso...</td>\n",
" <td>[{'url': 'https://clinicalstudydatarequest.com...</td>\n",
" <td>2014.0</td>\n",
" <td>[{'url': 'https://clinicalstudydatarequest.com...</td>\n",
" <td>[biodbcore-001149, bsg-d001149]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Preclinical Studies, Biomedical Science]</td>\n",
" <td>[]</td>\n",
" <td>[Homo sapiens]</td>\n",
" <td>[]</td>\n",
" <td>[Worldwide]</td>\n",
" <td>FAIRsharing record for: ClinicalStudyDataReque...</td>\n",
" <td>CSDR</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.t...</td>\n",
" <td>10.25504/FAIRsharing.tnByoG</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: ClinicalStu...</td>\n",
" <td>[]</td>\n",
" <td>[{'licence-name': 'CSDR Data Sharing Agreement...</td>\n",
" <td>NaN</td>\n",
" <td>CSDR</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>FAIRsharing_2657</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2078</td>\n",
" <td>fairsharing-records</td>\n",
" <td>2014-11-04T15:23:40.000Z</td>\n",
" <td>2021-09-30T11:34:43.129Z</td>\n",
" <td>10.25504/FAIRsharing.3axym7</td>\n",
" <td>Germplasm Resources Information Network</td>\n",
" <td>ready</td>\n",
" <td>[{'contact-email': 'dbmu@ars-grin.gov'}]</td>\n",
" <td>https://www.ars-grin.gov/</td>\n",
" <td>2078</td>\n",
" <td>GRIN provides National Genetic Resources Progr...</td>\n",
" <td>[{'url': 'https://www.ars-grin.gov/Pages/Colle...</td>\n",
" <td>2010.0</td>\n",
" <td>[{'url': 'https://www.ars-grin.gov/', 'name': ...</td>\n",
" <td>[biodbcore-000546, bsg-d000546]</td>\n",
" <td>Database</td>\n",
" <td>repository</td>\n",
" <td>[Life Science]</td>\n",
" <td>[Cell, Cell culture, Germplasm]</td>\n",
" <td>[Bacteria, Metazoa, Viridiplantae]</td>\n",
" <td>[]</td>\n",
" <td>[United States]</td>\n",
" <td>FAIRsharing record for: Germplasm Resources In...</td>\n",
" <td>GRIN</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.3...</td>\n",
" <td>10.25504/FAIRsharing.3axym7</td>\n",
" <td>https://creativecommons.org/licenses/by-sa/4.0...</td>\n",
" <td>This FAIRsharing record describes: GRIN provid...</td>\n",
" <td>[]</td>\n",
" <td>[]</td>\n",
" <td>NaN</td>\n",
" <td>GRIN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>FAIRsharing_2078</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" FAIRsharing_id FAIRsharing_type FAIRsharing_attributes.created-at \\\n",
"0 1723 fairsharing-records 2014-11-04T15:23:40.000Z \n",
"1 3101 fairsharing-records 2020-09-16T08:49:13.000Z \n",
"2 2649 fairsharing-records 2018-08-07T20:23:32.000Z \n",
"3 2657 fairsharing-records 2018-08-13T15:12:11.000Z \n",
"4 2078 fairsharing-records 2014-11-04T15:23:40.000Z \n",
"\n",
" FAIRsharing_attributes.updated-at FAIRsharing_attributes.metadata.doi \\\n",
"0 2021-09-30T11:39:06.829Z 10.25504/FAIRsharing.8t18te \n",
"1 2021-09-30T11:36:45.452Z NaN \n",
"2 2021-09-30T11:39:07.898Z NaN \n",
"3 2021-09-30T11:37:28.736Z 10.25504/FAIRsharing.tnByoG \n",
"4 2021-09-30T11:34:43.129Z 10.25504/FAIRsharing.3axym7 \n",
"\n",
" FAIRsharing_attributes.metadata.name \\\n",
"0 Cell Image Library \n",
"1 WHOI Ship Data-Grabber System \n",
"2 Electron Microscope Public Image Archive \n",
"3 ClinicalStudyDataRequest.com \n",
"4 Germplasm Resources Information Network \n",
"\n",
" FAIRsharing_attributes.metadata.status \\\n",
"0 ready \n",
"1 ready \n",
"2 ready \n",
"3 ready \n",
"4 ready \n",
"\n",
" FAIRsharing_attributes.metadata.contacts \\\n",
"0 [{'contact-name': 'David Orloff', 'contact-ema... \n",
"1 NaN \n",
"2 [{'contact-name': 'General contact', 'contact-... \n",
"3 [{'contact-email': 'support@clinicalstudydatar... \n",
"4 [{'contact-email': 'dbmu@ars-grin.gov'}] \n",
"\n",
" FAIRsharing_attributes.metadata.homepage \\\n",
"0 http://www.cellimagelibrary.org \n",
"1 http://4dgeo.whoi.edu/shipdata/SDG_shipdata.html \n",
"2 https://www.ebi.ac.uk/pdbe/emdb/empiar/ \n",
"3 https://clinicalstudydatarequest.com/ \n",
"4 https://www.ars-grin.gov/ \n",
"\n",
" FAIRsharing_attributes.metadata.identifier \\\n",
"0 1723 \n",
"1 3101 \n",
"2 2649 \n",
"3 2657 \n",
"4 2078 \n",
"\n",
" FAIRsharing_attributes.metadata.description \\\n",
"0 This library is a public and easily accessible... \n",
"1 The WHOI Ship DataGrabber system provides the ... \n",
"2 EMPIAR, the Electron Microscopy Public Image A... \n",
"3 ClinicalStudyDataRequest.com (CSDR) is a conso... \n",
"4 GRIN provides National Genetic Resources Progr... \n",
"\n",
" FAIRsharing_attributes.metadata.support-links \\\n",
"0 [{'url': 'http://www.cellimagelibrary.org/page... \n",
"1 [{'url': 'http://4dgeo.whoi.edu/shipdata/SDG_o... \n",
"2 [{'url': 'https://www.ebi.ac.uk/support/EMPIAR... \n",
"3 [{'url': 'https://clinicalstudydatarequest.com... \n",
"4 [{'url': 'https://www.ars-grin.gov/Pages/Colle... \n",
"\n",
" FAIRsharing_attributes.metadata.year-creation \\\n",
"0 2010.0 \n",
"1 2004.0 \n",
"2 2015.0 \n",
"3 2014.0 \n",
"4 2010.0 \n",
"\n",
" FAIRsharing_attributes.metadata.data-processes \\\n",
"0 [{'name': 'live update', 'type': 'data release... \n",
"1 [{'url': 'http://4dgeo.whoi.edu/sdg-bin/dv_mai... \n",
"2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n",
"3 [{'url': 'https://clinicalstudydatarequest.com... \n",
"4 [{'url': 'https://www.ars-grin.gov/', 'name': ... \n",
"\n",
" FAIRsharing_attributes.legacy-ids \\\n",
"0 [biodbcore-000180, bsg-d000180] \n",
"1 [biodbcore-001609, bsg-d001609] \n",
"2 [biodbcore-001140, bsg-d001140] \n",
"3 [biodbcore-001149, bsg-d001149] \n",
"4 [biodbcore-000546, bsg-d000546] \n",
"\n",
" FAIRsharing_attributes.fairsharing-registry \\\n",
"0 Database \n",
"1 Database \n",
"2 Database \n",
"3 Database \n",
"4 Database \n",
"\n",
" FAIRsharing_attributes.record-type \\\n",
"0 repository \n",
"1 repository \n",
"2 repository \n",
"3 repository \n",
"4 repository \n",
"\n",
" FAIRsharing_attributes.subjects \\\n",
"0 [Cell Biology, Life Science] \n",
"1 [Earth Science, Water Research, Oceanography] \n",
"2 [Bioinformatics, Biology] \n",
"3 [Preclinical Studies, Biomedical Science] \n",
"4 [Life Science] \n",
"\n",
" FAIRsharing_attributes.domains \\\n",
"0 [Cell, Microscopy, Light microscopy, Electron ... \n",
"1 [] \n",
"2 [Protein image, Microscopy, Electron microscop... \n",
"3 [] \n",
"4 [Cell, Cell culture, Germplasm] \n",
"\n",
" FAIRsharing_attributes.taxonomies \\\n",
"0 [All] \n",
"1 [Not applicable] \n",
"2 [All] \n",
"3 [Homo sapiens] \n",
"4 [Bacteria, Metazoa, Viridiplantae] \n",
"\n",
" FAIRsharing_attributes.user-defined-tags \\\n",
"0 [] \n",
"1 [subseafloor environments] \n",
"2 [] \n",
"3 [] \n",
"4 [] \n",
"\n",
" FAIRsharing_attributes.countries \\\n",
"0 [United States] \n",
"1 [United States] \n",
"2 [Greece, Czech Republic, United Kingdom, Icela... \n",
"3 [Worldwide] \n",
"4 [United States] \n",
"\n",
" FAIRsharing_attributes.name \\\n",
"0 FAIRsharing record for: Cell Image Library \n",
"1 FAIRsharing record for: WHOI Ship Data-Grabber... \n",
"2 FAIRsharing record for: Electron Microscope Pu... \n",
"3 FAIRsharing record for: ClinicalStudyDataReque... \n",
"4 FAIRsharing record for: Germplasm Resources In... \n",
"\n",
" FAIRsharing_attributes.abbreviation \\\n",
"0 None \n",
"1 None \n",
"2 EMPIAR \n",
"3 CSDR \n",
"4 GRIN \n",
"\n",
" FAIRsharing_attributes.url \\\n",
"0 https://fairsharing.org/10.25504/FAIRsharing.8... \n",
"1 https://fairsharing.org/fairsharing_records/3101 \n",
"2 https://fairsharing.org/fairsharing_records/2649 \n",
"3 https://fairsharing.org/10.25504/FAIRsharing.t... \n",
"4 https://fairsharing.org/10.25504/FAIRsharing.3... \n",
"\n",
" FAIRsharing_attributes.doi \\\n",
"0 10.25504/FAIRsharing.8t18te \n",
"1 None \n",
"2 None \n",
"3 10.25504/FAIRsharing.tnByoG \n",
"4 10.25504/FAIRsharing.3axym7 \n",
"\n",
" FAIRsharing_attributes.fairsharing-licence \\\n",
"0 https://creativecommons.org/licenses/by-sa/4.0... \n",
"1 https://creativecommons.org/licenses/by-sa/4.0... \n",
"2 https://creativecommons.org/licenses/by-sa/4.0... \n",
"3 https://creativecommons.org/licenses/by-sa/4.0... \n",
"4 https://creativecommons.org/licenses/by-sa/4.0... \n",
"\n",
" FAIRsharing_attributes.description \\\n",
"0 This FAIRsharing record describes: This librar... \n",
"1 This FAIRsharing record describes: The WHOI Sh... \n",
"2 This FAIRsharing record describes: EMPIAR, the... \n",
"3 This FAIRsharing record describes: ClinicalStu... \n",
"4 This FAIRsharing record describes: GRIN provid... \n",
"\n",
" FAIRsharing_attributes.publications \\\n",
"0 [{'id': 232, 'pubmed_id': 23203874, 'title': '... \n",
"1 [] \n",
"2 [{'id': 2232, 'pubmed_id': 27067018, 'title': ... \n",
"3 [] \n",
"4 [] \n",
"\n",
" FAIRsharing_attributes.licence-links \\\n",
"0 [{'licence-name': 'Cell Image Library Data Pol... \n",
"1 [{'licence-name': 'NDSF Data Archive Policy', ... \n",
"2 [{'licence-name': 'EMBL-EBI Terms of Use', 'li... \n",
"3 [{'licence-name': 'CSDR Data Sharing Agreement... \n",
"4 [] \n",
"\n",
" FAIRsharing_attributes.metadata.citations \\\n",
"0 NaN \n",
"1 NaN \n",
"2 [{'doi': '10.1038/nmeth.3806', 'pubmed-id': 27... \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.abbreviation \\\n",
"0 NaN \n",
"1 NaN \n",
"2 EMPIAR \n",
"3 CSDR \n",
"4 GRIN \n",
"\n",
" FAIRsharing_attributes.metadata.access-points \\\n",
"0 NaN \n",
"1 NaN \n",
"2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.associated-tools \\\n",
"0 NaN \n",
"1 NaN \n",
"2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.deprecation-date \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.deprecation-reason \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.tombstone FAIRsharing_unique_id \n",
"0 NaN FAIRsharing_1723 \n",
"1 NaN FAIRsharing_3101 \n",
"2 NaN FAIRsharing_2649 \n",
"3 NaN FAIRsharing_2657 \n",
"4 NaN FAIRsharing_2078 "
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open('../data/raw/fairsharing_dump_api_09_2021.json') as f:\n",
" lines = f.read().splitlines()\n",
" \n",
"fairsharing_df = pd.DataFrame(lines)\n",
"fairsharing_df.columns = ['json_element']\n",
"fairsharing_df['json_element'].apply(json.loads)\n",
"fairsharing_df = pd.json_normalize(fairsharing_df['json_element'].apply(json.loads))\n",
"\n",
"fairsharing_df['unique_id'] = 'FAIRsharing_' + fairsharing_df.id\n",
"fairsharing_df = fairsharing_df.add_prefix('FAIRsharing_')\n",
"fairsharing_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>re3data_orgIdentifier</th>\n",
" <th>re3data_repositoryName</th>\n",
" <th>re3data_repositoryName.language</th>\n",
" <th>re3data_additionalName</th>\n",
" <th>re3data_repositoryURL</th>\n",
" <th>re3data_repositoryIdentifier</th>\n",
" <th>re3data_repositoryContact</th>\n",
" <th>re3data_description</th>\n",
" <th>re3data_description.language</th>\n",
" <th>re3data_type</th>\n",
" <th>re3data_size</th>\n",
" <th>re3data_startDate</th>\n",
" <th>re3data_endDate</th>\n",
" <th>re3data_repositoryLanguage</th>\n",
" <th>re3data_subject</th>\n",
" <th>re3data_missionStatementURL</th>\n",
" <th>re3data_contentType</th>\n",
" <th>re3data_providerType</th>\n",
" <th>re3data_keyword</th>\n",
" <th>re3data_institution</th>\n",
" <th>re3data_policy</th>\n",
" <th>re3data_databaseAccess</th>\n",
" <th>re3data_databaseLicense</th>\n",
" <th>re3data_dataAccess</th>\n",
" <th>re3data_dataLicense</th>\n",
" <th>re3data_dataUploadType</th>\n",
" <th>re3data_dataUploadLicense</th>\n",
" <th>re3data_software</th>\n",
" <th>re3data_versioning</th>\n",
" <th>re3data_api</th>\n",
" <th>re3data_pidSystem</th>\n",
" <th>re3data_citationGuidelineURL</th>\n",
" <th>re3data_aidSystem</th>\n",
" <th>re3data_enhancedPublication</th>\n",
" <th>re3data_qualityManagement</th>\n",
" <th>re3data_certificate</th>\n",
" <th>re3data_metadataStandard</th>\n",
" <th>re3data_syndication</th>\n",
" <th>re3data_remarks</th>\n",
" <th>re3data_entryDate</th>\n",
" <th>re3data_lastUpdate</th>\n",
" <th>re3data_unique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>r3d100000001</td>\n",
" <td>Odum Institute Archive Dataverse</td>\n",
" <td>eng</td>\n",
" <td>[]</td>\n",
" <td>https://dataverse.unc.edu/dataverse/odum</td>\n",
" <td>[]</td>\n",
" <td>[\"https://dataverse.unc.edu/dataverse/odum#\", ...</td>\n",
" <td>The Odum Institute Archive Dataverse contains ...</td>\n",
" <td>eng</td>\n",
" <td>[disciplinary]</td>\n",
" <td>{\"size\": \"13 dataverses; 3.050 datasets\", \"upd...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[\"eng\"]</td>\n",
" <td>[1 Humanities and Social Sciences, 111 Social ...</td>\n",
" <td>NaN</td>\n",
" <td>[Databases, Plain text, Scientific and statist...</td>\n",
" <td>[dataProvider]</td>\n",
" <td>[FAIR, Middle East, crime, demography, economy...</td>\n",
" <td>[{'institutionName': 'Odum Institute for Resea...</td>\n",
" <td>[{\"policyName\": \"Collection Development Policy...</td>\n",
" <td>{\"databaseAccessType\": \"open\", \"databaseAcces...</td>\n",
" <td>[{\"databaseLicenseName\": \"CC0\", \"databaseLicen...</td>\n",
" <td>[{\"dataAccessType\": \"embargoed\", \"dataAccessRe...</td>\n",
" <td>[{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"...</td>\n",
" <td>restricted</td>\n",
" <td>[]</td>\n",
" <td>[\"DataVerse\"]</td>\n",
" <td>NaN</td>\n",
" <td>[]</td>\n",
" <td>[\"DOI\"]</td>\n",
" <td>NaN</td>\n",
" <td>[]</td>\n",
" <td>unknown</td>\n",
" <td>yes</td>\n",
" <td>[\"other\"]</td>\n",
" <td>[{\"metadataStandardName\": \"DDI - Data Document...</td>\n",
" <td>{}</td>\n",
" <td>Odum Dataverse is covered by Thomson Reuters D...</td>\n",
" <td>2013-06-10</td>\n",
" <td>2021-07-06</td>\n",
" <td>re3data_r3d100000001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>r3d100000002</td>\n",
" <td>Access to Archival Databases</td>\n",
" <td>eng</td>\n",
" <td>[{'additionalName': 'AAD', 'additionalNameLang...</td>\n",
" <td>https://aad.archives.gov/aad/</td>\n",
" <td>[RRID:SCR_010479, RRID:nlx_157752]</td>\n",
" <td>[\"https://www.archives.gov/contact\"]</td>\n",
" <td>You will find in the Access to Archival Databa...</td>\n",
" <td>eng</td>\n",
" <td>[disciplinary]</td>\n",
" <td>{\"size\": \"\", \"updatedp\": \"\"}</td>\n",
" <td>1985</td>\n",
" <td>NaN</td>\n",
" <td>[\"eng\", \"spa\"]</td>\n",
" <td>[1 Humanities and Social Sciences, 102 History...</td>\n",
" <td>https://www.archives.gov/publications/general-...</td>\n",
" <td>[Images, Standard office documents, Structured...</td>\n",
" <td>[dataProvider]</td>\n",
" <td>[US History]</td>\n",
" <td>[{'institutionName': 'The U.S. National Archiv...</td>\n",
" <td>[{\"policyName\": \"Contribution Policy\", \"policy...</td>\n",
" <td>{\"databaseAccessType\": \"open\", \"databaseAcces...</td>\n",
" <td>[]</td>\n",
" <td>[{\"dataAccessType\": \"open\", \"dataAccessRestric...</td>\n",
" <td>[{\"dataLicenseName\": \"Copyrights\", \"dataLicens...</td>\n",
" <td>restricted</td>\n",
" <td>[]</td>\n",
" <td>[\"unknown\"]</td>\n",
" <td>no</td>\n",
" <td>[\"https://www.archives.gov/developer#toc-appli...</td>\n",
" <td>[\"none\"]</td>\n",
" <td>https://aad.archives.gov/aad/help/getting-star...</td>\n",
" <td>[]</td>\n",
" <td>unknown</td>\n",
" <td>unknown</td>\n",
" <td>[]</td>\n",
" <td>[]</td>\n",
" <td>{\"syndication\": \"http://www.archives.gov/socia...</td>\n",
" <td>NaN</td>\n",
" <td>2012-07-04</td>\n",
" <td>2021-05-25</td>\n",
" <td>re3data_r3d100000002</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>r3d100000004</td>\n",
" <td>Datenbank Gesprochenes Deutsch</td>\n",
" <td>deu</td>\n",
" <td>[{'additionalName': 'DGD', 'additionalNameLang...</td>\n",
" <td>https://dgd.ids-mannheim.de/</td>\n",
" <td>[]</td>\n",
" <td>[\"dgd@ids-mannheim.de\"]</td>\n",
" <td>The \"Database for Spoken German (DGD)\" is a co...</td>\n",
" <td>eng</td>\n",
" <td>[disciplinary]</td>\n",
" <td>{\"size\": \"34 corpora\", \"updatedp\": \"2020-02-03\"}</td>\n",
" <td>2012</td>\n",
" <td>NaN</td>\n",
" <td>[\"deu\"]</td>\n",
" <td>[1 Humanities and Social Sciences, 104 Linguis...</td>\n",
" <td>https://dgd.ids-mannheim.de/dgd/pragdb.dgd_ext...</td>\n",
" <td>[Audiovisual data, Standard office documents, ...</td>\n",
" <td>[dataProvider, serviceProvider]</td>\n",
" <td>[Australian German, FOLK, German dialects, Pfe...</td>\n",
" <td>[{'institutionName': 'Institut für Deutsche Sp...</td>\n",
" <td>[{\"policyName\": \"Erfurter Aufruf zur Sicherung...</td>\n",
" <td>{\"databaseAccessType\": \"restricted\", \"databas...</td>\n",
" <td>[]</td>\n",
" <td>[{\"dataAccessType\": \"restricted\", \"dataAccessR...</td>\n",
" <td>[{\"dataLicenseName\": \"other\", \"dataLicenseURL\"...</td>\n",
" <td>restricted</td>\n",
" <td>[]</td>\n",
" <td>[\"other\"]</td>\n",
" <td>yes</td>\n",
" <td>[]</td>\n",
" <td>[\"none\"]</td>\n",
" <td>http://agd.ids-mannheim.de/konditionen.shtml</td>\n",
" <td>[]</td>\n",
" <td>unknown</td>\n",
" <td>unknown</td>\n",
" <td>[\"RatSWD\"]</td>\n",
" <td>[]</td>\n",
" <td>{}</td>\n",
" <td>NaN</td>\n",
" <td>2012-07-20</td>\n",
" <td>2020-08-27</td>\n",
" <td>re3data_r3d100000004</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>r3d100000005</td>\n",
" <td>UNC Dataverse</td>\n",
" <td>eng</td>\n",
" <td>[{'additionalName': 'University of North Carol...</td>\n",
" <td>https://dataverse.unc.edu/</td>\n",
" <td>[]</td>\n",
" <td>[\"https://dataverse.unc.edu/\", \"odumarchive@un...</td>\n",
" <td>UNC Dataverse is an open-source repository sof...</td>\n",
" <td>eng</td>\n",
" <td>[institutional]</td>\n",
" <td>{\"size\": \"186 dataverses; 25.272 studies; 229....</td>\n",
" <td>2011</td>\n",
" <td>NaN</td>\n",
" <td>[\"eng\"]</td>\n",
" <td>[1 Humanities and Social Sciences, 111 Social ...</td>\n",
" <td>https://odum.unc.edu/about/mission-vision/</td>\n",
" <td>[Archived data, Plain text, Raw data, Scientif...</td>\n",
" <td>[dataProvider, serviceProvider]</td>\n",
" <td>[FAIR, census, demographic survey, demography,...</td>\n",
" <td>[{'institutionName': 'Odum Institute for Resea...</td>\n",
" <td>[{\"policyName\": \"Collection Development Policy...</td>\n",
" <td>{\"databaseAccessType\": \"open\", \"databaseAcces...</td>\n",
" <td>[]</td>\n",
" <td>[{\"dataAccessType\": \"open\", \"dataAccessRestric...</td>\n",
" <td>[{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"...</td>\n",
" <td>restricted</td>\n",
" <td>[{\"dataUploadLicenseName\": \"Data Deposit Form\"...</td>\n",
" <td>[\"DataVerse\"]</td>\n",
" <td>yes</td>\n",
" <td>[\"https://guides.dataverse.org/en/latest/api/n...</td>\n",
" <td>[\"ARK\", \"DOI\", \"PURL\", \"URN\", \"hdl\"]</td>\n",
" <td>https://dataverse.org/best-practices/data-cita...</td>\n",
" <td>[]</td>\n",
" <td>unknown</td>\n",
" <td>yes</td>\n",
" <td>[]</td>\n",
" <td>[{\"metadataStandardName\": \"DDI - Data Document...</td>\n",
" <td>{}</td>\n",
" <td>UNC Dataverse is covered by Clarivate Data Cit...</td>\n",
" <td>2012-07-23</td>\n",
" <td>2021-08-11</td>\n",
" <td>re3data_r3d100000005</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>r3d100000006</td>\n",
" <td>Archaeology Data Service</td>\n",
" <td>eng</td>\n",
" <td>[{'additionalName': 'ADS', 'additionalNameLang...</td>\n",
" <td>https://archaeologydataservice.ac.uk/</td>\n",
" <td>[FAIRsharing_doi:10.25504/FAIRsharing.hm1mfg]</td>\n",
" <td>[\"help@archaeologydataservice.ac.uk\", \"https:/...</td>\n",
" <td>The ADS is an accredited digital repository fo...</td>\n",
" <td>eng</td>\n",
" <td>[disciplinary]</td>\n",
" <td>{\"size\": \"1837 results\", \"updatedp\": \"2020-05-...</td>\n",
" <td>1996-10-01</td>\n",
" <td>NaN</td>\n",
" <td>[\"eng\"]</td>\n",
" <td>[1 Humanities and Social Sciences, 101 Ancient...</td>\n",
" <td>https://archaeologydataservice.ac.uk/about/our...</td>\n",
" <td>[Archived data, Audiovisual data, Databases, I...</td>\n",
" <td>[dataProvider, serviceProvider]</td>\n",
" <td>[FAIR, archaeology, cultural heritage, prehist...</td>\n",
" <td>[{'institutionName': 'Arts and Humanities Rese...</td>\n",
" <td>[{\"policyName\": \"ADS Guides to good practice\",...</td>\n",
" <td>{\"databaseAccessType\": \"open\", \"databaseAcces...</td>\n",
" <td>[{\"databaseLicenseName\": \"CC\", \"databaseLicens...</td>\n",
" <td>[{\"dataAccessType\": \"open\", \"dataAccessRestric...</td>\n",
" <td>[{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"...</td>\n",
" <td>restricted</td>\n",
" <td>[{\"dataUploadLicenseName\": \"Guidelines for Dep...</td>\n",
" <td>[\"other\"]</td>\n",
" <td>yes</td>\n",
" <td>[\"https://archaeologydataservice.ac.uk/about/e...</td>\n",
" <td>[\"DOI\"]</td>\n",
" <td>https://archaeologydataservice.ac.uk/advice/te...</td>\n",
" <td>[]</td>\n",
" <td>unknown</td>\n",
" <td>yes</td>\n",
" <td>[\"other\"]</td>\n",
" <td>[{\"metadataStandardName\": \"DataCite Metadata S...</td>\n",
" <td>{\"syndication\": \"https://archaeologydataservic...</td>\n",
" <td>ADS is covered by Clarivate Data Citation Inde...</td>\n",
" <td>2012-07-23</td>\n",
" <td>2021-09-02</td>\n",
" <td>re3data_r3d100000006</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" re3data_orgIdentifier re3data_repositoryName \\\n",
"0 r3d100000001 Odum Institute Archive Dataverse \n",
"1 r3d100000002 Access to Archival Databases \n",
"2 r3d100000004 Datenbank Gesprochenes Deutsch \n",
"3 r3d100000005 UNC Dataverse \n",
"4 r3d100000006 Archaeology Data Service \n",
"\n",
" re3data_repositoryName.language \\\n",
"0 eng \n",
"1 eng \n",
"2 deu \n",
"3 eng \n",
"4 eng \n",
"\n",
" re3data_additionalName \\\n",
"0 [] \n",
"1 [{'additionalName': 'AAD', 'additionalNameLang... \n",
"2 [{'additionalName': 'DGD', 'additionalNameLang... \n",
"3 [{'additionalName': 'University of North Carol... \n",
"4 [{'additionalName': 'ADS', 'additionalNameLang... \n",
"\n",
" re3data_repositoryURL \\\n",
"0 https://dataverse.unc.edu/dataverse/odum \n",
"1 https://aad.archives.gov/aad/ \n",
"2 https://dgd.ids-mannheim.de/ \n",
"3 https://dataverse.unc.edu/ \n",
"4 https://archaeologydataservice.ac.uk/ \n",
"\n",
" re3data_repositoryIdentifier \\\n",
"0 [] \n",
"1 [RRID:SCR_010479, RRID:nlx_157752] \n",
"2 [] \n",
"3 [] \n",
"4 [FAIRsharing_doi:10.25504/FAIRsharing.hm1mfg] \n",
"\n",
" re3data_repositoryContact \\\n",
"0 [\"https://dataverse.unc.edu/dataverse/odum#\", ... \n",
"1 [\"https://www.archives.gov/contact\"] \n",
"2 [\"dgd@ids-mannheim.de\"] \n",
"3 [\"https://dataverse.unc.edu/\", \"odumarchive@un... \n",
"4 [\"help@archaeologydataservice.ac.uk\", \"https:/... \n",
"\n",
" re3data_description \\\n",
"0 The Odum Institute Archive Dataverse contains ... \n",
"1 You will find in the Access to Archival Databa... \n",
"2 The \"Database for Spoken German (DGD)\" is a co... \n",
"3 UNC Dataverse is an open-source repository sof... \n",
"4 The ADS is an accredited digital repository fo... \n",
"\n",
" re3data_description.language re3data_type \\\n",
"0 eng [disciplinary] \n",
"1 eng [disciplinary] \n",
"2 eng [disciplinary] \n",
"3 eng [institutional] \n",
"4 eng [disciplinary] \n",
"\n",
" re3data_size re3data_startDate \\\n",
"0 {\"size\": \"13 dataverses; 3.050 datasets\", \"upd... NaN \n",
"1 {\"size\": \"\", \"updatedp\": \"\"} 1985 \n",
"2 {\"size\": \"34 corpora\", \"updatedp\": \"2020-02-03\"} 2012 \n",
"3 {\"size\": \"186 dataverses; 25.272 studies; 229.... 2011 \n",
"4 {\"size\": \"1837 results\", \"updatedp\": \"2020-05-... 1996-10-01 \n",
"\n",
" re3data_endDate re3data_repositoryLanguage \\\n",
"0 NaN [\"eng\"] \n",
"1 NaN [\"eng\", \"spa\"] \n",
"2 NaN [\"deu\"] \n",
"3 NaN [\"eng\"] \n",
"4 NaN [\"eng\"] \n",
"\n",
" re3data_subject \\\n",
"0 [1 Humanities and Social Sciences, 111 Social ... \n",
"1 [1 Humanities and Social Sciences, 102 History... \n",
"2 [1 Humanities and Social Sciences, 104 Linguis... \n",
"3 [1 Humanities and Social Sciences, 111 Social ... \n",
"4 [1 Humanities and Social Sciences, 101 Ancient... \n",
"\n",
" re3data_missionStatementURL \\\n",
"0 NaN \n",
"1 https://www.archives.gov/publications/general-... \n",
"2 https://dgd.ids-mannheim.de/dgd/pragdb.dgd_ext... \n",
"3 https://odum.unc.edu/about/mission-vision/ \n",
"4 https://archaeologydataservice.ac.uk/about/our... \n",
"\n",
" re3data_contentType \\\n",
"0 [Databases, Plain text, Scientific and statist... \n",
"1 [Images, Standard office documents, Structured... \n",
"2 [Audiovisual data, Standard office documents, ... \n",
"3 [Archived data, Plain text, Raw data, Scientif... \n",
"4 [Archived data, Audiovisual data, Databases, I... \n",
"\n",
" re3data_providerType \\\n",
"0 [dataProvider] \n",
"1 [dataProvider] \n",
"2 [dataProvider, serviceProvider] \n",
"3 [dataProvider, serviceProvider] \n",
"4 [dataProvider, serviceProvider] \n",
"\n",
" re3data_keyword \\\n",
"0 [FAIR, Middle East, crime, demography, economy... \n",
"1 [US History] \n",
"2 [Australian German, FOLK, German dialects, Pfe... \n",
"3 [FAIR, census, demographic survey, demography,... \n",
"4 [FAIR, archaeology, cultural heritage, prehist... \n",
"\n",
" re3data_institution \\\n",
"0 [{'institutionName': 'Odum Institute for Resea... \n",
"1 [{'institutionName': 'The U.S. National Archiv... \n",
"2 [{'institutionName': 'Institut für Deutsche Sp... \n",
"3 [{'institutionName': 'Odum Institute for Resea... \n",
"4 [{'institutionName': 'Arts and Humanities Rese... \n",
"\n",
" re3data_policy \\\n",
"0 [{\"policyName\": \"Collection Development Policy... \n",
"1 [{\"policyName\": \"Contribution Policy\", \"policy... \n",
"2 [{\"policyName\": \"Erfurter Aufruf zur Sicherung... \n",
"3 [{\"policyName\": \"Collection Development Policy... \n",
"4 [{\"policyName\": \"ADS Guides to good practice\",... \n",
"\n",
" re3data_databaseAccess \\\n",
"0 {\"databaseAccessType\": \"open\", \"databaseAcces... \n",
"1 {\"databaseAccessType\": \"open\", \"databaseAcces... \n",
"2 {\"databaseAccessType\": \"restricted\", \"databas... \n",
"3 {\"databaseAccessType\": \"open\", \"databaseAcces... \n",
"4 {\"databaseAccessType\": \"open\", \"databaseAcces... \n",
"\n",
" re3data_databaseLicense \\\n",
"0 [{\"databaseLicenseName\": \"CC0\", \"databaseLicen... \n",
"1 [] \n",
"2 [] \n",
"3 [] \n",
"4 [{\"databaseLicenseName\": \"CC\", \"databaseLicens... \n",
"\n",
" re3data_dataAccess \\\n",
"0 [{\"dataAccessType\": \"embargoed\", \"dataAccessRe... \n",
"1 [{\"dataAccessType\": \"open\", \"dataAccessRestric... \n",
"2 [{\"dataAccessType\": \"restricted\", \"dataAccessR... \n",
"3 [{\"dataAccessType\": \"open\", \"dataAccessRestric... \n",
"4 [{\"dataAccessType\": \"open\", \"dataAccessRestric... \n",
"\n",
" re3data_dataLicense re3data_dataUploadType \\\n",
"0 [{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"... restricted \n",
"1 [{\"dataLicenseName\": \"Copyrights\", \"dataLicens... restricted \n",
"2 [{\"dataLicenseName\": \"other\", \"dataLicenseURL\"... restricted \n",
"3 [{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"... restricted \n",
"4 [{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"... restricted \n",
"\n",
" re3data_dataUploadLicense re3data_software \\\n",
"0 [] [\"DataVerse\"] \n",
"1 [] [\"unknown\"] \n",
"2 [] [\"other\"] \n",
"3 [{\"dataUploadLicenseName\": \"Data Deposit Form\"... [\"DataVerse\"] \n",
"4 [{\"dataUploadLicenseName\": \"Guidelines for Dep... [\"other\"] \n",
"\n",
" re3data_versioning re3data_api \\\n",
"0 NaN [] \n",
"1 no [\"https://www.archives.gov/developer#toc-appli... \n",
"2 yes [] \n",
"3 yes [\"https://guides.dataverse.org/en/latest/api/n... \n",
"4 yes [\"https://archaeologydataservice.ac.uk/about/e... \n",
"\n",
" re3data_pidSystem \\\n",
"0 [\"DOI\"] \n",
"1 [\"none\"] \n",
"2 [\"none\"] \n",
"3 [\"ARK\", \"DOI\", \"PURL\", \"URN\", \"hdl\"] \n",
"4 [\"DOI\"] \n",
"\n",
" re3data_citationGuidelineURL re3data_aidSystem \\\n",
"0 NaN [] \n",
"1 https://aad.archives.gov/aad/help/getting-star... [] \n",
"2 http://agd.ids-mannheim.de/konditionen.shtml [] \n",
"3 https://dataverse.org/best-practices/data-cita... [] \n",
"4 https://archaeologydataservice.ac.uk/advice/te... [] \n",
"\n",
" re3data_enhancedPublication re3data_qualityManagement re3data_certificate \\\n",
"0 unknown yes [\"other\"] \n",
"1 unknown unknown [] \n",
"2 unknown unknown [\"RatSWD\"] \n",
"3 unknown yes [] \n",
"4 unknown yes [\"other\"] \n",
"\n",
" re3data_metadataStandard \\\n",
"0 [{\"metadataStandardName\": \"DDI - Data Document... \n",
"1 [] \n",
"2 [] \n",
"3 [{\"metadataStandardName\": \"DDI - Data Document... \n",
"4 [{\"metadataStandardName\": \"DataCite Metadata S... \n",
"\n",
" re3data_syndication \\\n",
"0 {} \n",
"1 {\"syndication\": \"http://www.archives.gov/socia... \n",
"2 {} \n",
"3 {} \n",
"4 {\"syndication\": \"https://archaeologydataservic... \n",
"\n",
" re3data_remarks re3data_entryDate \\\n",
"0 Odum Dataverse is covered by Thomson Reuters D... 2013-06-10 \n",
"1 NaN 2012-07-04 \n",
"2 NaN 2012-07-20 \n",
"3 UNC Dataverse is covered by Clarivate Data Cit... 2012-07-23 \n",
"4 ADS is covered by Clarivate Data Citation Inde... 2012-07-23 \n",
"\n",
" re3data_lastUpdate re3data_unique_id \n",
"0 2021-07-06 re3data_r3d100000001 \n",
"1 2021-05-25 re3data_r3d100000002 \n",
"2 2020-08-27 re3data_r3d100000004 \n",
"3 2021-08-11 re3data_r3d100000005 \n",
"4 2021-09-02 re3data_r3d100000006 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"re3data_df = pd.read_csv('../data/raw/re3data.tsv', delimiter='\\t',\n",
" converters={'subject': ast.literal_eval,\n",
" 'keyword': ast.literal_eval,\n",
" 'additionalName': ast.literal_eval,\n",
" 'repositoryIdentifier': ast.literal_eval,\n",
" 'type': ast.literal_eval,\n",
" 'contentType': ast.literal_eval,\n",
" 'providerType': ast.literal_eval,\n",
" 'institution': ast.literal_eval\n",
" })\n",
"\n",
"re3data_df['unique_id'] = 're3data_' + re3data_df.orgIdentifier\n",
"re3data_df = re3data_df.add_prefix('re3data_')\n",
"re3data_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>OpenDOAR_system_metadata.id</th>\n",
" <th>OpenDOAR_repository_metadata.name</th>\n",
" <th>OpenDOAR_repository_metadata.alternativename</th>\n",
" <th>OpenDOAR_repository_metadata.url</th>\n",
" <th>OpenDOAR_repository_metadata.description</th>\n",
" <th>OpenDOAR_repository_metadata.type</th>\n",
" <th>OpenDOAR_repository_metadata.content_languages</th>\n",
" <th>OpenDOAR_system_metadata.date_modified</th>\n",
" <th>OpenDOAR_system_metadata.date_created</th>\n",
" <th>OpenDOAR_repository_metadata.content_subjects_phrases</th>\n",
" <th>OpenDOAR_repository_metadata.content_types</th>\n",
" <th>OpenDOAR_organization</th>\n",
" <th>OpenDOAR_policy_urls</th>\n",
" <th>OpenDOAR_repository_metadata.software</th>\n",
" <th>OpenDOAR_repository_metadata.oai_url</th>\n",
" <th>OpenDOAR_unique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>175</td>\n",
" <td>{\"name\": \"hku theses online\", \"language\": \"en\"}</td>\n",
" <td>[]</td>\n",
" <td>http://hub.hku.hk/handle/10722/1057</td>\n",
" <td>this is an institutional repository providing ...</td>\n",
" <td>institutional</td>\n",
" <td>[\"zh\", \"en\"]</td>\n",
" <td>2021-03-25 10:16:18</td>\n",
" <td>2005-12-21 12:44:08</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[\"bibliographic_references\", \"theses_and_disse...</td>\n",
" <td>[{\"name\": \"university of hong kong\", \"alternat...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"cris-5.3.1-snap...</td>\n",
" <td>NaN</td>\n",
" <td>OpenDOAR_175</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>64</td>\n",
" <td>{\"name\": \"research support scheme - central eu...</td>\n",
" <td>[]</td>\n",
" <td>http://rss.archives.ceu.hu/</td>\n",
" <td>this is an institutional repository collecting...</td>\n",
" <td>institutional</td>\n",
" <td>[\"cs\", \"en\", \"hu\", \"ru\"]</td>\n",
" <td>2021-03-25 09:48:31</td>\n",
" <td>2006-01-04 14:59:30</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[\"unpub_reports_and_working_papers\"]</td>\n",
" <td>[{\"name\": \"central european university\", \"alte...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"eprints\", \"version\": \"2.2.1\"}</td>\n",
" <td>http://rss.archives.ceu.hu/perl/oai2</td>\n",
" <td>OpenDOAR_64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>151</td>\n",
" <td>{\"name\": \"cadmus, eui research repository\", \"l...</td>\n",
" <td>[]</td>\n",
" <td>http://cadmus.eui.eu/</td>\n",
" <td>cadmus is the name of the eui research reposit...</td>\n",
" <td>institutional</td>\n",
" <td>[\"nl\", \"en\", \"fr\", \"de\", \"it\"]</td>\n",
" <td>2021-09-13 13:35:36</td>\n",
" <td>2006-01-04 12:07:07</td>\n",
" <td>[\"history and archaeology\", \"multidisciplinary...</td>\n",
" <td>[\"journal_articles\", \"theses_and_dissertations...</td>\n",
" <td>[{\"name\": \"european university institute\", \"al...</td>\n",
" <td>[{\"policy_url\": \"https://www.eui.eu/research/e...</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"5.2\"}</td>\n",
" <td>http://cadmus.eui.eu/oai/request</td>\n",
" <td>OpenDOAR_151</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>105</td>\n",
" <td>{\"name\": \"document server@uhasselt\", \"language...</td>\n",
" <td>[]</td>\n",
" <td>https://doclib.uhasselt.be/dspace/</td>\n",
" <td>this site is a university repository providing...</td>\n",
" <td>institutional</td>\n",
" <td>[\"nl\", \"en\", \"fr\", \"de\"]</td>\n",
" <td>2021-04-16 15:23:52</td>\n",
" <td>2006-01-24 15:46:44</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[\"journal_articles\", \"conference_and_workshop_...</td>\n",
" <td>[{\"name\": \"uhasselt\", \"alternativeName\": \"hass...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"1.7.2\"}</td>\n",
" <td>http://doclib.uhasselt.be/dspace-oai/request</td>\n",
" <td>OpenDOAR_105</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>101</td>\n",
" <td>{\"name\": \"utrecht university repository\", \"lan...</td>\n",
" <td>[]</td>\n",
" <td>http://dspace.library.uu.nl</td>\n",
" <td>this site is a university repository providing...</td>\n",
" <td>institutional</td>\n",
" <td>[\"nl\", \"en\"]</td>\n",
" <td>2021-04-16 15:22:03</td>\n",
" <td>2006-01-13 12:55:13</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[\"journal_articles\", \"conference_and_workshop_...</td>\n",
" <td>[{\"name\": \"university of utrecht\", \"alternativ...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"\"}</td>\n",
" <td>https://dspace.library.uu.nl/oai/request</td>\n",
" <td>OpenDOAR_101</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" OpenDOAR_system_metadata.id \\\n",
"0 175 \n",
"1 64 \n",
"2 151 \n",
"3 105 \n",
"4 101 \n",
"\n",
" OpenDOAR_repository_metadata.name \\\n",
"0 {\"name\": \"hku theses online\", \"language\": \"en\"} \n",
"1 {\"name\": \"research support scheme - central eu... \n",
"2 {\"name\": \"cadmus, eui research repository\", \"l... \n",
"3 {\"name\": \"document server@uhasselt\", \"language... \n",
"4 {\"name\": \"utrecht university repository\", \"lan... \n",
"\n",
" OpenDOAR_repository_metadata.alternativename \\\n",
"0 [] \n",
"1 [] \n",
"2 [] \n",
"3 [] \n",
"4 [] \n",
"\n",
" OpenDOAR_repository_metadata.url \\\n",
"0 http://hub.hku.hk/handle/10722/1057 \n",
"1 http://rss.archives.ceu.hu/ \n",
"2 http://cadmus.eui.eu/ \n",
"3 https://doclib.uhasselt.be/dspace/ \n",
"4 http://dspace.library.uu.nl \n",
"\n",
" OpenDOAR_repository_metadata.description \\\n",
"0 this is an institutional repository providing ... \n",
"1 this is an institutional repository collecting... \n",
"2 cadmus is the name of the eui research reposit... \n",
"3 this site is a university repository providing... \n",
"4 this site is a university repository providing... \n",
"\n",
" OpenDOAR_repository_metadata.type \\\n",
"0 institutional \n",
"1 institutional \n",
"2 institutional \n",
"3 institutional \n",
"4 institutional \n",
"\n",
" OpenDOAR_repository_metadata.content_languages \\\n",
"0 [\"zh\", \"en\"] \n",
"1 [\"cs\", \"en\", \"hu\", \"ru\"] \n",
"2 [\"nl\", \"en\", \"fr\", \"de\", \"it\"] \n",
"3 [\"nl\", \"en\", \"fr\", \"de\"] \n",
"4 [\"nl\", \"en\"] \n",
"\n",
" OpenDOAR_system_metadata.date_modified \\\n",
"0 2021-03-25 10:16:18 \n",
"1 2021-03-25 09:48:31 \n",
"2 2021-09-13 13:35:36 \n",
"3 2021-04-16 15:23:52 \n",
"4 2021-04-16 15:22:03 \n",
"\n",
" OpenDOAR_system_metadata.date_created \\\n",
"0 2005-12-21 12:44:08 \n",
"1 2006-01-04 14:59:30 \n",
"2 2006-01-04 12:07:07 \n",
"3 2006-01-24 15:46:44 \n",
"4 2006-01-13 12:55:13 \n",
"\n",
" OpenDOAR_repository_metadata.content_subjects_phrases \\\n",
"0 [\"multidisciplinary\"] \n",
"1 [\"multidisciplinary\"] \n",
"2 [\"history and archaeology\", \"multidisciplinary... \n",
"3 [\"multidisciplinary\"] \n",
"4 [\"multidisciplinary\"] \n",
"\n",
" OpenDOAR_repository_metadata.content_types \\\n",
"0 [\"bibliographic_references\", \"theses_and_disse... \n",
"1 [\"unpub_reports_and_working_papers\"] \n",
"2 [\"journal_articles\", \"theses_and_dissertations... \n",
"3 [\"journal_articles\", \"conference_and_workshop_... \n",
"4 [\"journal_articles\", \"conference_and_workshop_... \n",
"\n",
" OpenDOAR_organization \\\n",
"0 [{\"name\": \"university of hong kong\", \"alternat... \n",
"1 [{\"name\": \"central european university\", \"alte... \n",
"2 [{\"name\": \"european university institute\", \"al... \n",
"3 [{\"name\": \"uhasselt\", \"alternativeName\": \"hass... \n",
"4 [{\"name\": \"university of utrecht\", \"alternativ... \n",
"\n",
" OpenDOAR_policy_urls \\\n",
"0 [] \n",
"1 [] \n",
"2 [{\"policy_url\": \"https://www.eui.eu/research/e... \n",
"3 [] \n",
"4 [] \n",
"\n",
" OpenDOAR_repository_metadata.software \\\n",
"0 {\"name\": \"dspace\", \"version\": \"cris-5.3.1-snap... \n",
"1 {\"name\": \"eprints\", \"version\": \"2.2.1\"} \n",
"2 {\"name\": \"dspace\", \"version\": \"5.2\"} \n",
"3 {\"name\": \"dspace\", \"version\": \"1.7.2\"} \n",
"4 {\"name\": \"dspace\", \"version\": \"\"} \n",
"\n",
" OpenDOAR_repository_metadata.oai_url OpenDOAR_unique_id \n",
"0 NaN OpenDOAR_175 \n",
"1 http://rss.archives.ceu.hu/perl/oai2 OpenDOAR_64 \n",
"2 http://cadmus.eui.eu/oai/request OpenDOAR_151 \n",
"3 http://doclib.uhasselt.be/dspace-oai/request OpenDOAR_105 \n",
"4 https://dspace.library.uu.nl/oai/request OpenDOAR_101 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"opendoar_df = pd.read_csv('../data/raw/openDoar.tsv', delimiter='\\t',\n",
" converters={'subject': ast.literal_eval,\n",
" 'alternativeNames': ast.literal_eval,\n",
" 'contentType': ast.literal_eval,\n",
" 'institution': ast.literal_eval\n",
" },\n",
" dtype={'system_metadata.id': str})\n",
"\n",
"opendoar_df['unique_id'] = 'OpenDOAR_' + opendoar_df['system_metadata.id']\n",
"opendoar_df = opendoar_df.add_prefix('OpenDOAR_')\n",
"opendoar_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>roar_eprintid</th>\n",
" <th>roar_rev_number</th>\n",
" <th>roar_eprint_status</th>\n",
" <th>roar_userid</th>\n",
" <th>roar_importid</th>\n",
" <th>roar_source</th>\n",
" <th>roar_dir</th>\n",
" <th>roar_datestamp</th>\n",
" <th>roar_lastmod</th>\n",
" <th>roar_status_changed</th>\n",
" <th>roar_type</th>\n",
" <th>roar_succeeds</th>\n",
" <th>roar_commentary</th>\n",
" <th>roar_metadata_visibility</th>\n",
" <th>roar_latitude</th>\n",
" <th>roar_longitude</th>\n",
" <th>roar_relation_type</th>\n",
" <th>roar_relation_uri</th>\n",
" <th>roar_item_issues_id</th>\n",
" <th>roar_item_issues_type</th>\n",
" <th>roar_item_issues_description</th>\n",
" <th>roar_item_issues_timestamp</th>\n",
" <th>roar_item_issues_status</th>\n",
" <th>roar_item_issues_reported_by</th>\n",
" <th>roar_item_issues_resolved_by</th>\n",
" <th>roar_item_issues_comment</th>\n",
" <th>roar_item_issues_count</th>\n",
" <th>roar_sword_depositor</th>\n",
" <th>roar_sword_slug</th>\n",
" <th>roar_exemplar</th>\n",
" <th>roar_home_page</th>\n",
" <th>roar_title</th>\n",
" <th>roar_oai_pmh</th>\n",
" <th>roar_sword_endpoint</th>\n",
" <th>roar_rss_feed</th>\n",
" <th>roar_twitter_feed</th>\n",
" <th>roar_description</th>\n",
" <th>roar_fulltext</th>\n",
" <th>roar_open_access</th>\n",
" <th>roar_mandate</th>\n",
" <th>roar_organisation_title</th>\n",
" <th>roar_organisation_home_page</th>\n",
" <th>roar_location_country</th>\n",
" <th>roar_location_city</th>\n",
" <th>roar_location_latitude</th>\n",
" <th>roar_location_longitude</th>\n",
" <th>roar_software</th>\n",
" <th>roar_geoname</th>\n",
" <th>roar_version</th>\n",
" <th>roar_subjects</th>\n",
" <th>roar_date</th>\n",
" <th>roar_note</th>\n",
" <th>roar_suggestions</th>\n",
" <th>roar_activity_low</th>\n",
" <th>roar_activity_medium</th>\n",
" <th>roar_activity_high</th>\n",
" <th>roar_recordcount</th>\n",
" <th>roar_recordhistory</th>\n",
" <th>roar_fulltexts_total</th>\n",
" <th>roar_fulltexts_docs</th>\n",
" <th>roar_fulltexts_rtotal</th>\n",
" <th>roar_fulltexts_rdocs</th>\n",
" <th>roar_registry_name</th>\n",
" <th>roar_registry_id</th>\n",
" <th>roar_submit_to</th>\n",
" <th>roar_submitted_to_name</th>\n",
" <th>roar_submitted_to_done</th>\n",
" <th>roar_webometrics_rank</th>\n",
" <th>roar_webometrics_size</th>\n",
" <th>roar_webometrics_visibility</th>\n",
" <th>roar_webometrics_rich_files</th>\n",
" <th>roar_webometrics_scholar</th>\n",
" <th>roar_monthly_deposits</th>\n",
" <th>roar_total_deposits</th>\n",
" <th>roar_association</th>\n",
" <th>roar_unique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>633</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/00/01</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>2011-07-18 05:40:07</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://archivesic.ccsd.cnrs.fr/</td>\n",
" <td>@RCHIVESIC</td>\n",
" <td>http://archivesic.ccsd.cnrs.fr/oai/oai.php</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>fr</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>hal</td>\n",
" <td>geoname_2_FR</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2002-05-17 19:24:41</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>25</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[opendoar, celestial]</td>\n",
" <td>[58, 669]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roar_1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>10</td>\n",
" <td>511</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/00/10</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>2011-07-18 05:40:13</td>\n",
" <td>2010-01-06 13:43:48</td>\n",
" <td>institutional</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://www.diva-portal.org/mdh/</td>\n",
" <td>Academic Archive On-line (Mälardalen Universit...</td>\n",
" <td>http://www.diva-portal.org/oai/mdh/OAI</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>se</td>\n",
" <td>Uppsala</td>\n",
" <td>59.8667</td>\n",
" <td>17.6333</td>\n",
" <td>diva</td>\n",
" <td>geoname_2_SE</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2005-12-08 13:15:22</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>100</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[opendoar, celestial]</td>\n",
" <td>[258, 526]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roar_10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1000</td>\n",
" <td>274</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/10/00</td>\n",
" <td>2010-01-06 13:45:01</td>\n",
" <td>2011-07-06 08:21:21</td>\n",
" <td>2010-01-06 13:45:01</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://pam.pisharp.org/</td>\n",
" <td>PAM - Portuguese Archive of Mathematics</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>pt</td>\n",
" <td>Bellevue, WA</td>\n",
" <td>47.6034</td>\n",
" <td>-122.155</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_PT</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2006-05-04 10:48:14</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roar_1000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>10001</td>\n",
" <td>20</td>\n",
" <td>archive</td>\n",
" <td>91</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/01/00/01</td>\n",
" <td>2015-08-08 14:52:11</td>\n",
" <td>2016-03-21 19:44:01</td>\n",
" <td>2015-08-08 14:52:11</td>\n",
" <td>subject</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://edoc.sub.uni-hamburg.de/klimawandel/</td>\n",
" <td>Klimawandel Dokumentenserver</td>\n",
" <td>http://edoc.sub.uni-hamburg.de/klimawandel/oai</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>The \"Documentenserver Klimawandel\" (Repository...</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>[KLIMZUG projects, Climate Service Center 2.0,...</td>\n",
" <td>[http://www.hzg.de/, http://www.climateservice...</td>\n",
" <td>de</td>\n",
" <td>Hamburg</td>\n",
" <td>53.5511</td>\n",
" <td>9.9937</td>\n",
" <td>opus</td>\n",
" <td>geoname_2_DE</td>\n",
" <td>other</td>\n",
" <td>[GF, G1, S1, HD, GE]</td>\n",
" <td>2015-07-02 08:08:31</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[opendoar, celestial]</td>\n",
" <td>[3408, 5881]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roar_10001</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>10008</td>\n",
" <td>11</td>\n",
" <td>archive</td>\n",
" <td>404</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/01/00/08</td>\n",
" <td>2015-08-08 14:52:26</td>\n",
" <td>2016-03-21 19:43:51</td>\n",
" <td>2015-08-08 14:52:26</td>\n",
" <td>institutional</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://creativematter.skidmore.edu/</td>\n",
" <td>Creative Matter | Skidmore College Research</td>\n",
" <td>http://creativematter.skidmore.edu/do/oai/</td>\n",
" <td>NaN</td>\n",
" <td>http://creativematter.skidmore.edu/recent.rss</td>\n",
" <td>NaN</td>\n",
" <td>Welcome to Creative Matter, a repository for t...</td>\n",
" <td>TRUE</td>\n",
" <td>FALSE</td>\n",
" <td>FALSE</td>\n",
" <td>Skidmore College</td>\n",
" <td>http://www.skidmore.edu/</td>\n",
" <td>us</td>\n",
" <td>Saratoga Springs</td>\n",
" <td>43.0961</td>\n",
" <td>-73.7818</td>\n",
" <td>bepress</td>\n",
" <td>geoname_2_US</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2015-07-06 17:35:50</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>5882</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roar_10008</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" roar_eprintid roar_rev_number roar_eprint_status roar_userid roar_importid \\\n",
"0 1 633 archive 1 NaN \n",
"1 10 511 archive 1 NaN \n",
"2 1000 274 archive 1 NaN \n",
"3 10001 20 archive 91 NaN \n",
"4 10008 11 archive 404 NaN \n",
"\n",
" roar_source roar_dir roar_datestamp roar_lastmod \\\n",
"0 NaN disk0/00/00/00/01 2010-01-06 13:43:48 2011-07-18 05:40:07 \n",
"1 NaN disk0/00/00/00/10 2010-01-06 13:43:48 2011-07-18 05:40:13 \n",
"2 NaN disk0/00/00/10/00 2010-01-06 13:45:01 2011-07-06 08:21:21 \n",
"3 NaN disk0/00/01/00/01 2015-08-08 14:52:11 2016-03-21 19:44:01 \n",
"4 NaN disk0/00/01/00/08 2015-08-08 14:52:26 2016-03-21 19:43:51 \n",
"\n",
" roar_status_changed roar_type roar_succeeds roar_commentary \\\n",
"0 2010-01-06 13:43:48 subject NaN NaN \n",
"1 2010-01-06 13:43:48 institutional NaN NaN \n",
"2 2010-01-06 13:45:01 subject NaN NaN \n",
"3 2015-08-08 14:52:11 subject NaN NaN \n",
"4 2015-08-08 14:52:26 institutional NaN NaN \n",
"\n",
" roar_metadata_visibility roar_latitude roar_longitude roar_relation_type \\\n",
"0 show NaN NaN NaN \n",
"1 show NaN NaN NaN \n",
"2 show NaN NaN NaN \n",
"3 show NaN NaN NaN \n",
"4 show NaN NaN NaN \n",
"\n",
" roar_relation_uri roar_item_issues_id roar_item_issues_type \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_item_issues_description roar_item_issues_timestamp \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_item_issues_status roar_item_issues_reported_by \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_item_issues_resolved_by roar_item_issues_comment \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_item_issues_count roar_sword_depositor roar_sword_slug roar_exemplar \\\n",
"0 0 NaN NaN NaN \n",
"1 0 NaN NaN NaN \n",
"2 0 NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" roar_home_page \\\n",
"0 http://archivesic.ccsd.cnrs.fr/ \n",
"1 http://www.diva-portal.org/mdh/ \n",
"2 http://pam.pisharp.org/ \n",
"3 http://edoc.sub.uni-hamburg.de/klimawandel/ \n",
"4 http://creativematter.skidmore.edu/ \n",
"\n",
" roar_title \\\n",
"0 @RCHIVESIC \n",
"1 Academic Archive On-line (Mälardalen Universit... \n",
"2 PAM - Portuguese Archive of Mathematics \n",
"3 Klimawandel Dokumentenserver \n",
"4 Creative Matter | Skidmore College Research \n",
"\n",
" roar_oai_pmh roar_sword_endpoint \\\n",
"0 http://archivesic.ccsd.cnrs.fr/oai/oai.php NaN \n",
"1 http://www.diva-portal.org/oai/mdh/OAI NaN \n",
"2 NaN NaN \n",
"3 http://edoc.sub.uni-hamburg.de/klimawandel/oai NaN \n",
"4 http://creativematter.skidmore.edu/do/oai/ NaN \n",
"\n",
" roar_rss_feed roar_twitter_feed \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 http://creativematter.skidmore.edu/recent.rss NaN \n",
"\n",
" roar_description roar_fulltext \\\n",
"0 NaN NaN \n",
"1 NaN TRUE \n",
"2 NaN TRUE \n",
"3 The \"Documentenserver Klimawandel\" (Repository... TRUE \n",
"4 Welcome to Creative Matter, a repository for t... TRUE \n",
"\n",
" roar_open_access roar_mandate \\\n",
"0 NaN NaN \n",
"1 TRUE NaN \n",
"2 TRUE NaN \n",
"3 TRUE TRUE \n",
"4 FALSE FALSE \n",
"\n",
" roar_organisation_title \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 [KLIMZUG projects, Climate Service Center 2.0,... \n",
"4 Skidmore College \n",
"\n",
" roar_organisation_home_page roar_location_country \\\n",
"0 NaN fr \n",
"1 NaN se \n",
"2 NaN pt \n",
"3 [http://www.hzg.de/, http://www.climateservice... de \n",
"4 http://www.skidmore.edu/ us \n",
"\n",
" roar_location_city roar_location_latitude roar_location_longitude \\\n",
"0 NaN NaN NaN \n",
"1 Uppsala 59.8667 17.6333 \n",
"2 Bellevue, WA 47.6034 -122.155 \n",
"3 Hamburg 53.5511 9.9937 \n",
"4 Saratoga Springs 43.0961 -73.7818 \n",
"\n",
" roar_software roar_geoname roar_version roar_subjects \\\n",
"0 hal geoname_2_FR other NaN \n",
"1 diva geoname_2_SE other NaN \n",
"2 dspace geoname_2_PT other NaN \n",
"3 opus geoname_2_DE other [GF, G1, S1, HD, GE] \n",
"4 bepress geoname_2_US other NaN \n",
"\n",
" roar_date roar_note roar_suggestions roar_activity_low \\\n",
"0 2002-05-17 19:24:41 NaN NaN 0 \n",
"1 2005-12-08 13:15:22 NaN NaN 0 \n",
"2 2006-05-04 10:48:14 NaN NaN NaN \n",
"3 2015-07-02 08:08:31 NaN NaN NaN \n",
"4 2015-07-06 17:35:50 NaN NaN NaN \n",
"\n",
" roar_activity_medium roar_activity_high roar_recordcount \\\n",
"0 0 0 25 \n",
"1 0 0 100 \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_recordhistory roar_fulltexts_total \\\n",
"0 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... NaN \n",
"1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,100,100... NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_fulltexts_docs roar_fulltexts_rtotal roar_fulltexts_rdocs \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_registry_name roar_registry_id roar_submit_to \\\n",
"0 [opendoar, celestial] [58, 669] NaN \n",
"1 [opendoar, celestial] [258, 526] NaN \n",
"2 NaN NaN NaN \n",
"3 [opendoar, celestial] [3408, 5881] NaN \n",
"4 celestial 5882 NaN \n",
"\n",
" roar_submitted_to_name roar_submitted_to_done roar_webometrics_rank \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_webometrics_size roar_webometrics_visibility \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_webometrics_rich_files roar_webometrics_scholar roar_monthly_deposits \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_total_deposits roar_association roar_unique_id \n",
"0 NaN NaN roar_1 \n",
"1 NaN NaN roar_10 \n",
"2 NaN NaN roar_1000 \n",
"3 NaN NaN roar_10001 \n",
"4 NaN NaN roar_10008 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df = pd.read_csv('../data/raw/export_roar_CSV.csv', dtype='str')\n",
"roar_df = roar_df.groupby('eprintid').aggregate(set)\n",
"\n",
"def value_or_list(cell_set):\n",
" copy = set(cell_set)\n",
" copy.discard(np.nan) \n",
" if len(copy) == 0:\n",
" return np.nan\n",
" if len(copy) == 1:\n",
" return copy.pop()\n",
" return list(copy)\n",
" \n",
"roar_df = roar_df.applymap(value_or_list)\n",
"roar_df.reset_index(inplace=True)\n",
"\n",
"roar_df['unique_id'] = 'roar_' + roar_df.eprintid\n",
"roar_df = roar_df.add_prefix('roar_')\n",
"roar_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>roar_eprintid</th>\n",
" <th>roar_rev_number</th>\n",
" <th>roar_eprint_status</th>\n",
" <th>roar_userid</th>\n",
" <th>roar_importid</th>\n",
" <th>roar_source</th>\n",
" <th>roar_dir</th>\n",
" <th>roar_datestamp</th>\n",
" <th>roar_lastmod</th>\n",
" <th>roar_status_changed</th>\n",
" <th>roar_type</th>\n",
" <th>roar_succeeds</th>\n",
" <th>roar_commentary</th>\n",
" <th>roar_metadata_visibility</th>\n",
" <th>roar_latitude</th>\n",
" <th>roar_longitude</th>\n",
" <th>roar_relation_type</th>\n",
" <th>roar_relation_uri</th>\n",
" <th>roar_item_issues_id</th>\n",
" <th>roar_item_issues_type</th>\n",
" <th>roar_item_issues_description</th>\n",
" <th>roar_item_issues_timestamp</th>\n",
" <th>roar_item_issues_status</th>\n",
" <th>roar_item_issues_reported_by</th>\n",
" <th>roar_item_issues_resolved_by</th>\n",
" <th>roar_item_issues_comment</th>\n",
" <th>roar_item_issues_count</th>\n",
" <th>roar_sword_depositor</th>\n",
" <th>roar_sword_slug</th>\n",
" <th>roar_exemplar</th>\n",
" <th>roar_home_page</th>\n",
" <th>roar_title</th>\n",
" <th>roar_oai_pmh</th>\n",
" <th>roar_sword_endpoint</th>\n",
" <th>roar_rss_feed</th>\n",
" <th>roar_twitter_feed</th>\n",
" <th>roar_description</th>\n",
" <th>roar_fulltext</th>\n",
" <th>roar_open_access</th>\n",
" <th>roar_mandate</th>\n",
" <th>roar_organisation_title</th>\n",
" <th>roar_organisation_home_page</th>\n",
" <th>roar_location_country</th>\n",
" <th>roar_location_city</th>\n",
" <th>roar_location_latitude</th>\n",
" <th>roar_location_longitude</th>\n",
" <th>roar_software</th>\n",
" <th>roar_geoname</th>\n",
" <th>roar_version</th>\n",
" <th>roar_subjects</th>\n",
" <th>roar_date</th>\n",
" <th>roar_note</th>\n",
" <th>roar_suggestions</th>\n",
" <th>roar_activity_low</th>\n",
" <th>roar_activity_medium</th>\n",
" <th>roar_activity_high</th>\n",
" <th>roar_recordcount</th>\n",
" <th>roar_recordhistory</th>\n",
" <th>roar_fulltexts_total</th>\n",
" <th>roar_fulltexts_docs</th>\n",
" <th>roar_fulltexts_rtotal</th>\n",
" <th>roar_fulltexts_rdocs</th>\n",
" <th>roar_registry_name</th>\n",
" <th>roar_registry_id</th>\n",
" <th>roar_submit_to</th>\n",
" <th>roar_submitted_to_name</th>\n",
" <th>roar_submitted_to_done</th>\n",
" <th>roar_webometrics_rank</th>\n",
" <th>roar_webometrics_size</th>\n",
" <th>roar_webometrics_visibility</th>\n",
" <th>roar_webometrics_rich_files</th>\n",
" <th>roar_webometrics_scholar</th>\n",
" <th>roar_monthly_deposits</th>\n",
" <th>roar_total_deposits</th>\n",
" <th>roar_association</th>\n",
" <th>roar_unique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>10013</td>\n",
" <td>31</td>\n",
" <td>archive</td>\n",
" <td>7104</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/01/00/13</td>\n",
" <td>2015-08-08 14:53:04</td>\n",
" <td>2016-03-21 19:54:43</td>\n",
" <td>2015-08-08 14:53:04</td>\n",
" <td>institutional</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://er.ucu.edu.ua/</td>\n",
" <td>ErUCU: Electronic repository of the Ukrainian ...</td>\n",
" <td>http://er.ucu.edu.ua/oai/request</td>\n",
" <td>http://er.ucu.edu.ua/sword/</td>\n",
" <td>http://er.ucu.edu.ua/feed/rss_2.0/site</td>\n",
" <td>NaN</td>\n",
" <td>Ukrainian Catholic Universitys institutional ...</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>Ukrainian Catholic University</td>\n",
" <td>http://ucu.edu.ua/eng/</td>\n",
" <td>ua</td>\n",
" <td>Lviv</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_UA</td>\n",
" <td>other</td>\n",
" <td>[BF, H1, BR, AC, BL, L1, D204, B1, D1, BS, HM,...</td>\n",
" <td>2015-07-07 12:38:37</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[opendoar, celestial]</td>\n",
" <td>[3410, 5883]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[russell_group, ivy_league]</td>\n",
" <td>roar_10013</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" roar_eprintid roar_rev_number roar_eprint_status roar_userid roar_importid \\\n",
"7 10013 31 archive 7104 NaN \n",
"\n",
" roar_source roar_dir roar_datestamp roar_lastmod \\\n",
"7 NaN disk0/00/01/00/13 2015-08-08 14:53:04 2016-03-21 19:54:43 \n",
"\n",
" roar_status_changed roar_type roar_succeeds roar_commentary \\\n",
"7 2015-08-08 14:53:04 institutional NaN NaN \n",
"\n",
" roar_metadata_visibility roar_latitude roar_longitude roar_relation_type \\\n",
"7 show NaN NaN NaN \n",
"\n",
" roar_relation_uri roar_item_issues_id roar_item_issues_type \\\n",
"7 NaN NaN NaN \n",
"\n",
" roar_item_issues_description roar_item_issues_timestamp \\\n",
"7 NaN NaN \n",
"\n",
" roar_item_issues_status roar_item_issues_reported_by \\\n",
"7 NaN NaN \n",
"\n",
" roar_item_issues_resolved_by roar_item_issues_comment \\\n",
"7 NaN NaN \n",
"\n",
" roar_item_issues_count roar_sword_depositor roar_sword_slug roar_exemplar \\\n",
"7 NaN NaN NaN NaN \n",
"\n",
" roar_home_page roar_title \\\n",
"7 http://er.ucu.edu.ua/ ErUCU: Electronic repository of the Ukrainian ... \n",
"\n",
" roar_oai_pmh roar_sword_endpoint \\\n",
"7 http://er.ucu.edu.ua/oai/request http://er.ucu.edu.ua/sword/ \n",
"\n",
" roar_rss_feed roar_twitter_feed \\\n",
"7 http://er.ucu.edu.ua/feed/rss_2.0/site NaN \n",
"\n",
" roar_description roar_fulltext \\\n",
"7 Ukrainian Catholic Universitys institutional ... TRUE \n",
"\n",
" roar_open_access roar_mandate roar_organisation_title \\\n",
"7 TRUE TRUE Ukrainian Catholic University \n",
"\n",
" roar_organisation_home_page roar_location_country roar_location_city \\\n",
"7 http://ucu.edu.ua/eng/ ua Lviv \n",
"\n",
" roar_location_latitude roar_location_longitude roar_software roar_geoname \\\n",
"7 NaN NaN dspace geoname_2_UA \n",
"\n",
" roar_version roar_subjects \\\n",
"7 other [BF, H1, BR, AC, BL, L1, D204, B1, D1, BS, HM,... \n",
"\n",
" roar_date roar_note roar_suggestions roar_activity_low \\\n",
"7 2015-07-07 12:38:37 NaN NaN NaN \n",
"\n",
" roar_activity_medium roar_activity_high roar_recordcount roar_recordhistory \\\n",
"7 NaN NaN NaN NaN \n",
"\n",
" roar_fulltexts_total roar_fulltexts_docs roar_fulltexts_rtotal \\\n",
"7 NaN NaN NaN \n",
"\n",
" roar_fulltexts_rdocs roar_registry_name roar_registry_id roar_submit_to \\\n",
"7 NaN [opendoar, celestial] [3410, 5883] NaN \n",
"\n",
" roar_submitted_to_name roar_submitted_to_done roar_webometrics_rank \\\n",
"7 NaN NaN NaN \n",
"\n",
" roar_webometrics_size roar_webometrics_visibility \\\n",
"7 NaN NaN \n",
"\n",
" roar_webometrics_rich_files roar_webometrics_scholar roar_monthly_deposits \\\n",
"7 NaN NaN NaN \n",
"\n",
" roar_total_deposits roar_association roar_unique_id \n",
"7 NaN [russell_group, ivy_league] roar_10013 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df[roar_df.roar_eprintid == '10013']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Loading dedup results"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dedup_id</th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" <th>unique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>dedup::860320be12a1c050cd7731794e231bd3</td>\n",
" <td>opendoar____::2290a7385ed77cc5592dc2153229f082</td>\n",
" <td>1064</td>\n",
" <td>oxford university research archive</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_1064</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>dedup::1aa7a8773e6a7fdacbcedf9999009a38</td>\n",
" <td>opendoar____::191f8f858acda435ae0daf994e2a72c2</td>\n",
" <td>8648</td>\n",
" <td>digital commons@georgia southern</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_8648</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>dedup::31bceb0c3e2a260593e1e36655ebcee4</td>\n",
" <td>opendoar____::d5776aeecb3c45ab15adce6f5cb355f3</td>\n",
" <td>9713</td>\n",
" <td>materials data repository</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_9713</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>dedup::e37b08dd3015330dcbb5d6663667b8b8</td>\n",
" <td>opendoar____::18997733ec258a9fcaf239cc55d53363</td>\n",
" <td>427</td>\n",
" <td>digital repository at the university of maryland</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_427</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>dedup::2841194266115ac1cc04d19630cde46b</td>\n",
" <td>re3data_____::3afbb2b45a3dd218a5a091ca773cf6c5</td>\n",
" <td>r3d100011189</td>\n",
" <td>PRISM: University of Calgary's Digital Repository</td>\n",
" <td>re3data</td>\n",
" <td>re3data_r3d100011189</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" dedup_id \\\n",
"0 dedup::860320be12a1c050cd7731794e231bd3 \n",
"1 dedup::1aa7a8773e6a7fdacbcedf9999009a38 \n",
"2 dedup::31bceb0c3e2a260593e1e36655ebcee4 \n",
"3 dedup::e37b08dd3015330dcbb5d6663667b8b8 \n",
"4 dedup::2841194266115ac1cc04d19630cde46b \n",
"\n",
" duplicate_id original_id \\\n",
"0 opendoar____::2290a7385ed77cc5592dc2153229f082 1064 \n",
"1 opendoar____::191f8f858acda435ae0daf994e2a72c2 8648 \n",
"2 opendoar____::d5776aeecb3c45ab15adce6f5cb355f3 9713 \n",
"3 opendoar____::18997733ec258a9fcaf239cc55d53363 427 \n",
"4 re3data_____::3afbb2b45a3dd218a5a091ca773cf6c5 r3d100011189 \n",
"\n",
" name source \\\n",
"0 oxford university research archive OpenDOAR \n",
"1 digital commons@georgia southern OpenDOAR \n",
"2 materials data repository OpenDOAR \n",
"3 digital repository at the university of maryland OpenDOAR \n",
"4 PRISM: University of Calgary's Digital Repository re3data \n",
"\n",
" unique_id \n",
"0 OpenDOAR_1064 \n",
"1 OpenDOAR_8648 \n",
"2 OpenDOAR_9713 \n",
"3 OpenDOAR_427 \n",
"4 re3data_r3d100011189 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup = pd.read_csv('../data/interim/fairsharing_dedup.csv', sep=';', quotechar='\"', header=None, names=['dedup_id', 'duplicate_id', 'original_id', 'name', 'source'])\n",
"dup['unique_id'] = dup.source + '_' + dup.original_id\n",
"dup.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dedup_id</th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" <th>unique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>4617</td>\n",
" <td>4617</td>\n",
" <td>4617</td>\n",
" <td>4617</td>\n",
" <td>4617</td>\n",
" <td>4617</td>\n",
" </tr>\n",
" <tr>\n",
" <th>unique</th>\n",
" <td>2191</td>\n",
" <td>4617</td>\n",
" <td>4159</td>\n",
" <td>3968</td>\n",
" <td>4</td>\n",
" <td>4617</td>\n",
" </tr>\n",
" <tr>\n",
" <th>top</th>\n",
" <td>dedup::75e33da9b103b7b91dcd8da0abe1354b</td>\n",
" <td>opendoar____::2290a7385ed77cc5592dc2153229f082</td>\n",
" <td>2399</td>\n",
" <td>UPN JATIM REPOSITORY</td>\n",
" <td>roar</td>\n",
" <td>OpenDOAR_1064</td>\n",
" </tr>\n",
" <tr>\n",
" <th>freq</th>\n",
" <td>5</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>1977</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" dedup_id \\\n",
"count 4617 \n",
"unique 2191 \n",
"top dedup::75e33da9b103b7b91dcd8da0abe1354b \n",
"freq 5 \n",
"\n",
" duplicate_id original_id \\\n",
"count 4617 4617 \n",
"unique 4617 4159 \n",
"top opendoar____::2290a7385ed77cc5592dc2153229f082 2399 \n",
"freq 1 3 \n",
"\n",
" name source unique_id \n",
"count 4617 4617 4617 \n",
"unique 3968 4 4617 \n",
"top UPN JATIM REPOSITORY roar OpenDOAR_1064 \n",
"freq 4 1977 1 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup.describe()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"dup_grouped = dup.groupby('dedup_id').aggregate(list)\n",
"dup_grouped['source_set'] = dup_grouped.source.map(set)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"duplicate_id 6\n",
"original_id 6\n",
"name 6\n",
"source 6\n",
"unique_id 6\n",
"source_set 6\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_grouped[dup_grouped.source_set.str.len() == 4].count()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"duplicate_id 60\n",
"original_id 60\n",
"name 60\n",
"source 60\n",
"unique_id 60\n",
"source_set 60\n",
"dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_grouped[dup_grouped.source_set.str.len() == 3].count()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"duplicate_id 1986\n",
"original_id 1986\n",
"name 1986\n",
"source 1986\n",
"unique_id 1986\n",
"source_set 1986\n",
"dtype: int64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_grouped[dup_grouped.source_set.str.len() == 2].count()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"duplicate_id 139\n",
"original_id 139\n",
"name 139\n",
"source 139\n",
"unique_id 139\n",
"source_set 139\n",
"dtype: int64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_grouped[dup_grouped.source_set.str.len() == 1].count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Isolating duplicates within a registry"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" <th>unique_id</th>\n",
" <th>source_set</th>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>dedup::000871c1fc726f0b52dc86a4eeb027de</th>\n",
" <td>[4612, 4649]</td>\n",
" <td>[4612, 4649]</td>\n",
" <td>[IIT Bombay Institutional Repository, IIT Bomb...</td>\n",
" <td>[roar, roar]</td>\n",
" <td>[roar_4612, roar_4649]</td>\n",
" <td>{roar}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::0163cceb20f5ca7b313419c068abd9dc</th>\n",
" <td>[7943, 8003]</td>\n",
" <td>[7943, 8003]</td>\n",
" <td>[EPrints@NIRT Library Welcomes! - EPrints@NITR...</td>\n",
" <td>[roar, roar]</td>\n",
" <td>[roar_7943, roar_8003]</td>\n",
" <td>{roar}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::028ee724157b05d04e7bdcf237d12e60</th>\n",
" <td>[2670, 2698, 2741]</td>\n",
" <td>[2670, 2698, 2741]</td>\n",
" <td>[HSF Brage Open Research Archive, HSF Brage Op...</td>\n",
" <td>[roar, roar, roar]</td>\n",
" <td>[roar_2670, roar_2698, roar_2741]</td>\n",
" <td>{roar}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::03593ce517feac573fdaafa6dcedef61</th>\n",
" <td>[4393, 4394]</td>\n",
" <td>[4393, 4394]</td>\n",
" <td>[Institutional Repository of Kunming Institute...</td>\n",
" <td>[roar, roar]</td>\n",
" <td>[roar_4393, roar_4394]</td>\n",
" <td>{roar}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::03e0704b5690a2dee1861dc3ad3316c9</th>\n",
" <td>[1019, 5550]</td>\n",
" <td>[1019, 5550]</td>\n",
" <td>[PolyU Institutional Repository, PolyU Institu...</td>\n",
" <td>[roar, roar]</td>\n",
" <td>[roar_1019, roar_5550]</td>\n",
" <td>{roar}</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" duplicate_id \\\n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de [4612, 4649] \n",
"dedup::0163cceb20f5ca7b313419c068abd9dc [7943, 8003] \n",
"dedup::028ee724157b05d04e7bdcf237d12e60 [2670, 2698, 2741] \n",
"dedup::03593ce517feac573fdaafa6dcedef61 [4393, 4394] \n",
"dedup::03e0704b5690a2dee1861dc3ad3316c9 [1019, 5550] \n",
"\n",
" original_id \\\n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de [4612, 4649] \n",
"dedup::0163cceb20f5ca7b313419c068abd9dc [7943, 8003] \n",
"dedup::028ee724157b05d04e7bdcf237d12e60 [2670, 2698, 2741] \n",
"dedup::03593ce517feac573fdaafa6dcedef61 [4393, 4394] \n",
"dedup::03e0704b5690a2dee1861dc3ad3316c9 [1019, 5550] \n",
"\n",
" name \\\n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de [IIT Bombay Institutional Repository, IIT Bomb... \n",
"dedup::0163cceb20f5ca7b313419c068abd9dc [EPrints@NIRT Library Welcomes! - EPrints@NITR... \n",
"dedup::028ee724157b05d04e7bdcf237d12e60 [HSF Brage Open Research Archive, HSF Brage Op... \n",
"dedup::03593ce517feac573fdaafa6dcedef61 [Institutional Repository of Kunming Institute... \n",
"dedup::03e0704b5690a2dee1861dc3ad3316c9 [PolyU Institutional Repository, PolyU Institu... \n",
"\n",
" source \\\n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de [roar, roar] \n",
"dedup::0163cceb20f5ca7b313419c068abd9dc [roar, roar] \n",
"dedup::028ee724157b05d04e7bdcf237d12e60 [roar, roar, roar] \n",
"dedup::03593ce517feac573fdaafa6dcedef61 [roar, roar] \n",
"dedup::03e0704b5690a2dee1861dc3ad3316c9 [roar, roar] \n",
"\n",
" unique_id \\\n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de [roar_4612, roar_4649] \n",
"dedup::0163cceb20f5ca7b313419c068abd9dc [roar_7943, roar_8003] \n",
"dedup::028ee724157b05d04e7bdcf237d12e60 [roar_2670, roar_2698, roar_2741] \n",
"dedup::03593ce517feac573fdaafa6dcedef61 [roar_4393, roar_4394] \n",
"dedup::03e0704b5690a2dee1861dc3ad3316c9 [roar_1019, roar_5550] \n",
"\n",
" source_set \n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de {roar} \n",
"dedup::0163cceb20f5ca7b313419c068abd9dc {roar} \n",
"dedup::028ee724157b05d04e7bdcf237d12e60 {roar} \n",
"dedup::03593ce517feac573fdaafa6dcedef61 {roar} \n",
"dedup::03e0704b5690a2dee1861dc3ad3316c9 {roar} "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_within = dup.groupby('dedup_id').aggregate(list)\n",
"dup_within['source_set'] = dup_within.source.map(set)\n",
"dup_within = dup_within[dup_within.source_set.str.len() == 1]\n",
"dup_within.head()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" <th>unique_id</th>\n",
" <th>source_set</th>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup_id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>dedup::000871c1fc726f0b52dc86a4eeb027de</th>\n",
" <td>[4612, 4649]</td>\n",
" <td>[4612, 4649]</td>\n",
" <td>[IIT Bombay Institutional Repository, IIT Bomb...</td>\n",
" <td>[roar, roar]</td>\n",
" <td>[roar_4612, roar_4649]</td>\n",
" <td>roar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::0163cceb20f5ca7b313419c068abd9dc</th>\n",
" <td>[7943, 8003]</td>\n",
" <td>[7943, 8003]</td>\n",
" <td>[EPrints@NIRT Library Welcomes! - EPrints@NITR...</td>\n",
" <td>[roar, roar]</td>\n",
" <td>[roar_7943, roar_8003]</td>\n",
" <td>roar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::028ee724157b05d04e7bdcf237d12e60</th>\n",
" <td>[2670, 2698, 2741]</td>\n",
" <td>[2670, 2698, 2741]</td>\n",
" <td>[HSF Brage Open Research Archive, HSF Brage Op...</td>\n",
" <td>[roar, roar, roar]</td>\n",
" <td>[roar_2670, roar_2698, roar_2741]</td>\n",
" <td>roar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::03593ce517feac573fdaafa6dcedef61</th>\n",
" <td>[4393, 4394]</td>\n",
" <td>[4393, 4394]</td>\n",
" <td>[Institutional Repository of Kunming Institute...</td>\n",
" <td>[roar, roar]</td>\n",
" <td>[roar_4393, roar_4394]</td>\n",
" <td>roar</td>\n",
" </tr>\n",
" <tr>\n",
" <th>dedup::03e0704b5690a2dee1861dc3ad3316c9</th>\n",
" <td>[1019, 5550]</td>\n",
" <td>[1019, 5550]</td>\n",
" <td>[PolyU Institutional Repository, PolyU Institu...</td>\n",
" <td>[roar, roar]</td>\n",
" <td>[roar_1019, roar_5550]</td>\n",
" <td>roar</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" duplicate_id \\\n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de [4612, 4649] \n",
"dedup::0163cceb20f5ca7b313419c068abd9dc [7943, 8003] \n",
"dedup::028ee724157b05d04e7bdcf237d12e60 [2670, 2698, 2741] \n",
"dedup::03593ce517feac573fdaafa6dcedef61 [4393, 4394] \n",
"dedup::03e0704b5690a2dee1861dc3ad3316c9 [1019, 5550] \n",
"\n",
" original_id \\\n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de [4612, 4649] \n",
"dedup::0163cceb20f5ca7b313419c068abd9dc [7943, 8003] \n",
"dedup::028ee724157b05d04e7bdcf237d12e60 [2670, 2698, 2741] \n",
"dedup::03593ce517feac573fdaafa6dcedef61 [4393, 4394] \n",
"dedup::03e0704b5690a2dee1861dc3ad3316c9 [1019, 5550] \n",
"\n",
" name \\\n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de [IIT Bombay Institutional Repository, IIT Bomb... \n",
"dedup::0163cceb20f5ca7b313419c068abd9dc [EPrints@NIRT Library Welcomes! - EPrints@NITR... \n",
"dedup::028ee724157b05d04e7bdcf237d12e60 [HSF Brage Open Research Archive, HSF Brage Op... \n",
"dedup::03593ce517feac573fdaafa6dcedef61 [Institutional Repository of Kunming Institute... \n",
"dedup::03e0704b5690a2dee1861dc3ad3316c9 [PolyU Institutional Repository, PolyU Institu... \n",
"\n",
" source \\\n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de [roar, roar] \n",
"dedup::0163cceb20f5ca7b313419c068abd9dc [roar, roar] \n",
"dedup::028ee724157b05d04e7bdcf237d12e60 [roar, roar, roar] \n",
"dedup::03593ce517feac573fdaafa6dcedef61 [roar, roar] \n",
"dedup::03e0704b5690a2dee1861dc3ad3316c9 [roar, roar] \n",
"\n",
" unique_id \\\n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de [roar_4612, roar_4649] \n",
"dedup::0163cceb20f5ca7b313419c068abd9dc [roar_7943, roar_8003] \n",
"dedup::028ee724157b05d04e7bdcf237d12e60 [roar_2670, roar_2698, roar_2741] \n",
"dedup::03593ce517feac573fdaafa6dcedef61 [roar_4393, roar_4394] \n",
"dedup::03e0704b5690a2dee1861dc3ad3316c9 [roar_1019, roar_5550] \n",
"\n",
" source_set \n",
"dedup_id \n",
"dedup::000871c1fc726f0b52dc86a4eeb027de roar \n",
"dedup::0163cceb20f5ca7b313419c068abd9dc roar \n",
"dedup::028ee724157b05d04e7bdcf237d12e60 roar \n",
"dedup::03593ce517feac573fdaafa6dcedef61 roar \n",
"dedup::03e0704b5690a2dee1861dc3ad3316c9 roar "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_within['source_set'] = dup_within.source_set.map(set.pop)\n",
"dup_within.head()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" <th>unique_id</th>\n",
" </tr>\n",
" <tr>\n",
" <th>source_set</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>OpenDOAR</th>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" <td>16</td>\n",
" </tr>\n",
" <tr>\n",
" <th>re3data</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>roar</th>\n",
" <td>121</td>\n",
" <td>121</td>\n",
" <td>121</td>\n",
" <td>121</td>\n",
" <td>121</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" duplicate_id original_id name source unique_id\n",
"source_set \n",
"OpenDOAR 16 16 16 16 16\n",
"re3data 2 2 2 2 2\n",
"roar 121 121 121 121 121"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_within.groupby('source_set').count()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dedup_id</th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" <th>unique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>dedup::d2ddea18f00665ce8623e36bd4e3c7c5</td>\n",
" <td>8237</td>\n",
" <td>8237</td>\n",
" <td>AIR | Archivio Istituzionale della Ricerca</td>\n",
" <td>roar</td>\n",
" <td>roar_8237</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>dedup::4c5bcfec8584af0d967f1ab10179ca4b</td>\n",
" <td>2820</td>\n",
" <td>2820</td>\n",
" <td>USU Repository: Open Access Repository</td>\n",
" <td>roar</td>\n",
" <td>roar_2820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>dedup::c2ae5cb2426d96ed19a50b0b7d7c8e11</td>\n",
" <td>9487</td>\n",
" <td>9487</td>\n",
" <td>IR at NRF: Home</td>\n",
" <td>roar</td>\n",
" <td>roar_9487</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>dedup::1c65cef3dfd1e00c0b03923a1c591db4</td>\n",
" <td>1241</td>\n",
" <td>1241</td>\n",
" <td>Swansea Metropolitan University Repository</td>\n",
" <td>roar</td>\n",
" <td>roar_1241</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>dedup::4217ec5d78c4bc4e5bd006783482441f</td>\n",
" <td>15142</td>\n",
" <td>15142</td>\n",
" <td>Repositorio Institucional</td>\n",
" <td>roar</td>\n",
" <td>roar_15142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4560</th>\n",
" <td>dedup::fc394e9935fbd62c8aedc372464e1965</td>\n",
" <td>7161</td>\n",
" <td>7161</td>\n",
" <td>Welcome to IR@NPL</td>\n",
" <td>roar</td>\n",
" <td>roar_7161</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4586</th>\n",
" <td>dedup::000871c1fc726f0b52dc86a4eeb027de</td>\n",
" <td>4649</td>\n",
" <td>4649</td>\n",
" <td>IIT Bombay Institutional Repository</td>\n",
" <td>roar</td>\n",
" <td>roar_4649</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4587</th>\n",
" <td>dedup::72c288a828485e5b1d4c52910d106734</td>\n",
" <td>16867</td>\n",
" <td>16867</td>\n",
" <td>Chung Shan Medical University Institutional Re...</td>\n",
" <td>roar</td>\n",
" <td>roar_16867</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4598</th>\n",
" <td>dedup::0163cceb20f5ca7b313419c068abd9dc</td>\n",
" <td>8003</td>\n",
" <td>8003</td>\n",
" <td>EPrints@NIRT Library Welcomes! - EPrints@NIRT</td>\n",
" <td>roar</td>\n",
" <td>roar_8003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4608</th>\n",
" <td>dedup::2aeb1a8f8475cef63900be5d0780e872</td>\n",
" <td>15471</td>\n",
" <td>15471</td>\n",
" <td>Repository STIE Nobel Indonesia</td>\n",
" <td>roar</td>\n",
" <td>roar_15471</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>287 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" dedup_id duplicate_id original_id \\\n",
"28 dedup::d2ddea18f00665ce8623e36bd4e3c7c5 8237 8237 \n",
"31 dedup::4c5bcfec8584af0d967f1ab10179ca4b 2820 2820 \n",
"46 dedup::c2ae5cb2426d96ed19a50b0b7d7c8e11 9487 9487 \n",
"53 dedup::1c65cef3dfd1e00c0b03923a1c591db4 1241 1241 \n",
"59 dedup::4217ec5d78c4bc4e5bd006783482441f 15142 15142 \n",
"... ... ... ... \n",
"4560 dedup::fc394e9935fbd62c8aedc372464e1965 7161 7161 \n",
"4586 dedup::000871c1fc726f0b52dc86a4eeb027de 4649 4649 \n",
"4587 dedup::72c288a828485e5b1d4c52910d106734 16867 16867 \n",
"4598 dedup::0163cceb20f5ca7b313419c068abd9dc 8003 8003 \n",
"4608 dedup::2aeb1a8f8475cef63900be5d0780e872 15471 15471 \n",
"\n",
" name source unique_id \n",
"28 AIR | Archivio Istituzionale della Ricerca roar roar_8237 \n",
"31 USU Repository: Open Access Repository roar roar_2820 \n",
"46 IR at NRF: Home roar roar_9487 \n",
"53 Swansea Metropolitan University Repository roar roar_1241 \n",
"59 Repositorio Institucional roar roar_15142 \n",
"... ... ... ... \n",
"4560 Welcome to IR@NPL roar roar_7161 \n",
"4586 IIT Bombay Institutional Repository roar roar_4649 \n",
"4587 Chung Shan Medical University Institutional Re... roar roar_16867 \n",
"4598 EPrints@NIRT Library Welcomes! - EPrints@NIRT roar roar_8003 \n",
"4608 Repository STIE Nobel Indonesia roar roar_15471 \n",
"\n",
"[287 rows x 6 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_within = dup[dup.dedup_id.isin(dup_within.index)]\n",
"dup_within"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Isolating duplicates across registries (hybrid)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dedup_id</th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" <th>unique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>dedup::471c50ad1a156d7256eddfd747d77931</td>\n",
" <td>opendoar____::6351bf9dce654515bf1ddbd6426dfa97</td>\n",
" <td>1996</td>\n",
" <td>ehtc repositorio institucional</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_1996</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>dedup::69dafe8b58066478aea48f3d0f384820</td>\n",
" <td>2312</td>\n",
" <td>2312</td>\n",
" <td>Göteborgs universitets publikationer - e-publi...</td>\n",
" <td>roar</td>\n",
" <td>roar_2312</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>dedup::8f822ac814829da24a7065b8131bdf47</td>\n",
" <td>opendoar____::a34bacf839b923770b2c360eefa26748</td>\n",
" <td>1035</td>\n",
" <td>kitami institute of technology repository</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_1035</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>dedup::63a99723ebb3af94d52b474c3b21dbe1</td>\n",
" <td>5779</td>\n",
" <td>5779</td>\n",
" <td>Sanok Digital Library</td>\n",
" <td>roar</td>\n",
" <td>roar_5779</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>dedup::82680bfec0fa08346c1b10d30a3e3d4a</td>\n",
" <td>11212</td>\n",
" <td>11212</td>\n",
" <td>Publication Server of the Wuppertal Institute</td>\n",
" <td>roar</td>\n",
" <td>roar_11212</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4601</th>\n",
" <td>dedup::7810ccd41bf26faaa2c4e1f20db70a71</td>\n",
" <td>3172</td>\n",
" <td>3172</td>\n",
" <td>Tesis Electrónicas UACh</td>\n",
" <td>roar</td>\n",
" <td>roar_3172</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4602</th>\n",
" <td>dedup::e655c7716a4b3ea67f48c6322fc42ed6</td>\n",
" <td>opendoar____::52c5189391854c93e8a0e1326e56c14f</td>\n",
" <td>1637</td>\n",
" <td>vtext digital repository</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_1637</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4603</th>\n",
" <td>dedup::5ebe5626b9f1cd89fbb9f665a527591f</td>\n",
" <td>16225</td>\n",
" <td>16225</td>\n",
" <td>Necmettin Erbakan University Institutional Rep...</td>\n",
" <td>roar</td>\n",
" <td>roar_16225</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4605</th>\n",
" <td>dedup::ec0bfd000f253eff3acb1043e1c06979</td>\n",
" <td>opendoar____::aa2a77371374094fe9e0bc1de3f94ed9</td>\n",
" <td>1829</td>\n",
" <td>npue ir</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_1829</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4610</th>\n",
" <td>dedup::1c7836dbabd12c458d20e3b35633733a</td>\n",
" <td>14616</td>\n",
" <td>14616</td>\n",
" <td>SOAR@USA: Scholarship and Open Access Repository</td>\n",
" <td>roar</td>\n",
" <td>roar_14616</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>440 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" dedup_id \\\n",
"12 dedup::471c50ad1a156d7256eddfd747d77931 \n",
"21 dedup::69dafe8b58066478aea48f3d0f384820 \n",
"26 dedup::8f822ac814829da24a7065b8131bdf47 \n",
"41 dedup::63a99723ebb3af94d52b474c3b21dbe1 \n",
"47 dedup::82680bfec0fa08346c1b10d30a3e3d4a \n",
"... ... \n",
"4601 dedup::7810ccd41bf26faaa2c4e1f20db70a71 \n",
"4602 dedup::e655c7716a4b3ea67f48c6322fc42ed6 \n",
"4603 dedup::5ebe5626b9f1cd89fbb9f665a527591f \n",
"4605 dedup::ec0bfd000f253eff3acb1043e1c06979 \n",
"4610 dedup::1c7836dbabd12c458d20e3b35633733a \n",
"\n",
" duplicate_id original_id \\\n",
"12 opendoar____::6351bf9dce654515bf1ddbd6426dfa97 1996 \n",
"21 2312 2312 \n",
"26 opendoar____::a34bacf839b923770b2c360eefa26748 1035 \n",
"41 5779 5779 \n",
"47 11212 11212 \n",
"... ... ... \n",
"4601 3172 3172 \n",
"4602 opendoar____::52c5189391854c93e8a0e1326e56c14f 1637 \n",
"4603 16225 16225 \n",
"4605 opendoar____::aa2a77371374094fe9e0bc1de3f94ed9 1829 \n",
"4610 14616 14616 \n",
"\n",
" name source \\\n",
"12 ehtc repositorio institucional OpenDOAR \n",
"21 Göteborgs universitets publikationer - e-publi... roar \n",
"26 kitami institute of technology repository OpenDOAR \n",
"41 Sanok Digital Library roar \n",
"47 Publication Server of the Wuppertal Institute roar \n",
"... ... ... \n",
"4601 Tesis Electrónicas UACh roar \n",
"4602 vtext digital repository OpenDOAR \n",
"4603 Necmettin Erbakan University Institutional Rep... roar \n",
"4605 npue ir OpenDOAR \n",
"4610 SOAR@USA: Scholarship and Open Access Repository roar \n",
"\n",
" unique_id \n",
"12 OpenDOAR_1996 \n",
"21 roar_2312 \n",
"26 OpenDOAR_1035 \n",
"41 roar_5779 \n",
"47 roar_11212 \n",
"... ... \n",
"4601 roar_3172 \n",
"4602 OpenDOAR_1637 \n",
"4603 roar_16225 \n",
"4605 OpenDOAR_1829 \n",
"4610 roar_14616 \n",
"\n",
"[440 rows x 6 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_across = dup[~dup.dedup_id.isin(dup_within.dedup_id)]\n",
"dup_across = dup_across.groupby('dedup_id').aggregate(list)\n",
"dup_across['source_set'] = dup_across.source.map(set)\n",
"\n",
"dup_hybrid = dup_across[dup_across.source_set.str.len() < dup_across.source.str.len()]\n",
"dup_hybrid = dup[dup.dedup_id.isin(dup_hybrid.index)]\n",
"dup_hybrid"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Isolating duplicates across registries (pure)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dedup_id</th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" <th>unique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>dedup::860320be12a1c050cd7731794e231bd3</td>\n",
" <td>opendoar____::2290a7385ed77cc5592dc2153229f082</td>\n",
" <td>1064</td>\n",
" <td>oxford university research archive</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_1064</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>dedup::1aa7a8773e6a7fdacbcedf9999009a38</td>\n",
" <td>opendoar____::191f8f858acda435ae0daf994e2a72c2</td>\n",
" <td>8648</td>\n",
" <td>digital commons@georgia southern</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_8648</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>dedup::31bceb0c3e2a260593e1e36655ebcee4</td>\n",
" <td>opendoar____::d5776aeecb3c45ab15adce6f5cb355f3</td>\n",
" <td>9713</td>\n",
" <td>materials data repository</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_9713</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>dedup::e37b08dd3015330dcbb5d6663667b8b8</td>\n",
" <td>opendoar____::18997733ec258a9fcaf239cc55d53363</td>\n",
" <td>427</td>\n",
" <td>digital repository at the university of maryland</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_427</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>dedup::2841194266115ac1cc04d19630cde46b</td>\n",
" <td>re3data_____::3afbb2b45a3dd218a5a091ca773cf6c5</td>\n",
" <td>r3d100011189</td>\n",
" <td>PRISM: University of Calgary's Digital Repository</td>\n",
" <td>re3data</td>\n",
" <td>re3data_r3d100011189</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4612</th>\n",
" <td>dedup::5ef0b4eba35ab2d6180b0bca7e46b6f9</td>\n",
" <td>475</td>\n",
" <td>475</td>\n",
" <td>Ecological Restoration Institute - Northern Ar...</td>\n",
" <td>roar</td>\n",
" <td>roar_475</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4613</th>\n",
" <td>dedup::66e8d052ec2230c66bd11ee6b5a0e3c8</td>\n",
" <td>14199</td>\n",
" <td>14199</td>\n",
" <td>Repositori STKIP PGRI Sumenep</td>\n",
" <td>roar</td>\n",
" <td>roar_14199</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4614</th>\n",
" <td>dedup::1216a1bca4361c39d1d77965c5d95ee3</td>\n",
" <td>4960</td>\n",
" <td>4960</td>\n",
" <td>Virtual Archive of Polish Armenians</td>\n",
" <td>roar</td>\n",
" <td>roar_4960</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4615</th>\n",
" <td>dedup::1408358fe6a7f9327dd41a5651ac284c</td>\n",
" <td>13824</td>\n",
" <td>13824</td>\n",
" <td>Digital Commons @ New Jersey Institute of Tech...</td>\n",
" <td>roar</td>\n",
" <td>roar_13824</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4616</th>\n",
" <td>dedup::5cc33dfe7e069a757ca0fcbe6b95c89e</td>\n",
" <td>opendoar____::d8a4e572d866aa45da78418d9d2ff9f9</td>\n",
" <td>4351</td>\n",
" <td>odu digital commons</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_4351</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3890 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" dedup_id \\\n",
"0 dedup::860320be12a1c050cd7731794e231bd3 \n",
"1 dedup::1aa7a8773e6a7fdacbcedf9999009a38 \n",
"2 dedup::31bceb0c3e2a260593e1e36655ebcee4 \n",
"3 dedup::e37b08dd3015330dcbb5d6663667b8b8 \n",
"4 dedup::2841194266115ac1cc04d19630cde46b \n",
"... ... \n",
"4612 dedup::5ef0b4eba35ab2d6180b0bca7e46b6f9 \n",
"4613 dedup::66e8d052ec2230c66bd11ee6b5a0e3c8 \n",
"4614 dedup::1216a1bca4361c39d1d77965c5d95ee3 \n",
"4615 dedup::1408358fe6a7f9327dd41a5651ac284c \n",
"4616 dedup::5cc33dfe7e069a757ca0fcbe6b95c89e \n",
"\n",
" duplicate_id original_id \\\n",
"0 opendoar____::2290a7385ed77cc5592dc2153229f082 1064 \n",
"1 opendoar____::191f8f858acda435ae0daf994e2a72c2 8648 \n",
"2 opendoar____::d5776aeecb3c45ab15adce6f5cb355f3 9713 \n",
"3 opendoar____::18997733ec258a9fcaf239cc55d53363 427 \n",
"4 re3data_____::3afbb2b45a3dd218a5a091ca773cf6c5 r3d100011189 \n",
"... ... ... \n",
"4612 475 475 \n",
"4613 14199 14199 \n",
"4614 4960 4960 \n",
"4615 13824 13824 \n",
"4616 opendoar____::d8a4e572d866aa45da78418d9d2ff9f9 4351 \n",
"\n",
" name source \\\n",
"0 oxford university research archive OpenDOAR \n",
"1 digital commons@georgia southern OpenDOAR \n",
"2 materials data repository OpenDOAR \n",
"3 digital repository at the university of maryland OpenDOAR \n",
"4 PRISM: University of Calgary's Digital Repository re3data \n",
"... ... ... \n",
"4612 Ecological Restoration Institute - Northern Ar... roar \n",
"4613 Repositori STKIP PGRI Sumenep roar \n",
"4614 Virtual Archive of Polish Armenians roar \n",
"4615 Digital Commons @ New Jersey Institute of Tech... roar \n",
"4616 odu digital commons OpenDOAR \n",
"\n",
" unique_id \n",
"0 OpenDOAR_1064 \n",
"1 OpenDOAR_8648 \n",
"2 OpenDOAR_9713 \n",
"3 OpenDOAR_427 \n",
"4 re3data_r3d100011189 \n",
"... ... \n",
"4612 roar_475 \n",
"4613 roar_14199 \n",
"4614 roar_4960 \n",
"4615 roar_13824 \n",
"4616 OpenDOAR_4351 \n",
"\n",
"[3890 rows x 6 columns]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_across = dup_across[dup_across.source_set.str.len() == dup_across.source.str.len()]\n",
"dup_across = dup[dup.dedup_id.isin(dup_across.index)]\n",
"dup_across\n",
"# dup[dup.dedup_id.isin(dup_across.index)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Double check partitions"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dedup_id 4617\n",
"duplicate_id 4617\n",
"original_id 4617\n",
"name 4617\n",
"source 4617\n",
"unique_id 4617\n",
"dtype: int64"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup.count()"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dedup_id 4617\n",
"duplicate_id 4617\n",
"original_id 4617\n",
"name 4617\n",
"source 4617\n",
"unique_id 4617\n",
"dtype: int64"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_across.count() + dup_within.count() + dup_hybrid.count()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2191"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_within.groupby('dedup_id').ngroups + dup_across.groupby('dedup_id').ngroups + dup_hybrid.groupby('dedup_id').ngroups"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2191"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup.groupby('dedup_id').ngroups"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Joining information"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dedup_id</th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" <th>unique_id</th>\n",
" <th>FAIRsharing_id</th>\n",
" <th>FAIRsharing_type</th>\n",
" <th>FAIRsharing_attributes.created-at</th>\n",
" <th>FAIRsharing_attributes.updated-at</th>\n",
" <th>FAIRsharing_attributes.metadata.doi</th>\n",
" <th>FAIRsharing_attributes.metadata.name</th>\n",
" <th>FAIRsharing_attributes.metadata.status</th>\n",
" <th>FAIRsharing_attributes.metadata.contacts</th>\n",
" <th>FAIRsharing_attributes.metadata.homepage</th>\n",
" <th>FAIRsharing_attributes.metadata.identifier</th>\n",
" <th>FAIRsharing_attributes.metadata.description</th>\n",
" <th>FAIRsharing_attributes.metadata.support-links</th>\n",
" <th>FAIRsharing_attributes.metadata.year-creation</th>\n",
" <th>FAIRsharing_attributes.metadata.data-processes</th>\n",
" <th>FAIRsharing_attributes.legacy-ids</th>\n",
" <th>FAIRsharing_attributes.fairsharing-registry</th>\n",
" <th>FAIRsharing_attributes.record-type</th>\n",
" <th>FAIRsharing_attributes.subjects</th>\n",
" <th>FAIRsharing_attributes.domains</th>\n",
" <th>FAIRsharing_attributes.taxonomies</th>\n",
" <th>FAIRsharing_attributes.user-defined-tags</th>\n",
" <th>FAIRsharing_attributes.countries</th>\n",
" <th>FAIRsharing_attributes.name</th>\n",
" <th>FAIRsharing_attributes.abbreviation</th>\n",
" <th>FAIRsharing_attributes.url</th>\n",
" <th>FAIRsharing_attributes.doi</th>\n",
" <th>FAIRsharing_attributes.fairsharing-licence</th>\n",
" <th>FAIRsharing_attributes.description</th>\n",
" <th>FAIRsharing_attributes.publications</th>\n",
" <th>FAIRsharing_attributes.licence-links</th>\n",
" <th>FAIRsharing_attributes.metadata.citations</th>\n",
" <th>FAIRsharing_attributes.metadata.abbreviation</th>\n",
" <th>FAIRsharing_attributes.metadata.access-points</th>\n",
" <th>FAIRsharing_attributes.metadata.associated-tools</th>\n",
" <th>FAIRsharing_attributes.metadata.deprecation-date</th>\n",
" <th>FAIRsharing_attributes.metadata.deprecation-reason</th>\n",
" <th>FAIRsharing_attributes.metadata.tombstone</th>\n",
" <th>FAIRsharing_unique_id</th>\n",
" <th>re3data_orgIdentifier</th>\n",
" <th>re3data_repositoryName</th>\n",
" <th>re3data_repositoryName.language</th>\n",
" <th>re3data_additionalName</th>\n",
" <th>re3data_repositoryURL</th>\n",
" <th>re3data_repositoryIdentifier</th>\n",
" <th>re3data_repositoryContact</th>\n",
" <th>re3data_description</th>\n",
" <th>re3data_description.language</th>\n",
" <th>re3data_type</th>\n",
" <th>re3data_size</th>\n",
" <th>re3data_startDate</th>\n",
" <th>re3data_endDate</th>\n",
" <th>re3data_repositoryLanguage</th>\n",
" <th>re3data_subject</th>\n",
" <th>re3data_missionStatementURL</th>\n",
" <th>re3data_contentType</th>\n",
" <th>re3data_providerType</th>\n",
" <th>re3data_keyword</th>\n",
" <th>re3data_institution</th>\n",
" <th>re3data_policy</th>\n",
" <th>re3data_databaseAccess</th>\n",
" <th>re3data_databaseLicense</th>\n",
" <th>re3data_dataAccess</th>\n",
" <th>re3data_dataLicense</th>\n",
" <th>re3data_dataUploadType</th>\n",
" <th>re3data_dataUploadLicense</th>\n",
" <th>re3data_software</th>\n",
" <th>re3data_versioning</th>\n",
" <th>re3data_api</th>\n",
" <th>re3data_pidSystem</th>\n",
" <th>re3data_citationGuidelineURL</th>\n",
" <th>re3data_aidSystem</th>\n",
" <th>re3data_enhancedPublication</th>\n",
" <th>re3data_qualityManagement</th>\n",
" <th>re3data_certificate</th>\n",
" <th>re3data_metadataStandard</th>\n",
" <th>re3data_syndication</th>\n",
" <th>re3data_remarks</th>\n",
" <th>re3data_entryDate</th>\n",
" <th>re3data_lastUpdate</th>\n",
" <th>re3data_unique_id</th>\n",
" <th>OpenDOAR_system_metadata.id</th>\n",
" <th>OpenDOAR_repository_metadata.name</th>\n",
" <th>OpenDOAR_repository_metadata.alternativename</th>\n",
" <th>OpenDOAR_repository_metadata.url</th>\n",
" <th>OpenDOAR_repository_metadata.description</th>\n",
" <th>OpenDOAR_repository_metadata.type</th>\n",
" <th>OpenDOAR_repository_metadata.content_languages</th>\n",
" <th>OpenDOAR_system_metadata.date_modified</th>\n",
" <th>OpenDOAR_system_metadata.date_created</th>\n",
" <th>OpenDOAR_repository_metadata.content_subjects_phrases</th>\n",
" <th>OpenDOAR_repository_metadata.content_types</th>\n",
" <th>OpenDOAR_organization</th>\n",
" <th>OpenDOAR_policy_urls</th>\n",
" <th>OpenDOAR_repository_metadata.software</th>\n",
" <th>OpenDOAR_repository_metadata.oai_url</th>\n",
" <th>OpenDOAR_unique_id</th>\n",
" <th>roar_eprintid</th>\n",
" <th>roar_rev_number</th>\n",
" <th>roar_eprint_status</th>\n",
" <th>roar_userid</th>\n",
" <th>roar_importid</th>\n",
" <th>roar_source</th>\n",
" <th>roar_dir</th>\n",
" <th>roar_datestamp</th>\n",
" <th>roar_lastmod</th>\n",
" <th>roar_status_changed</th>\n",
" <th>roar_type</th>\n",
" <th>roar_succeeds</th>\n",
" <th>roar_commentary</th>\n",
" <th>roar_metadata_visibility</th>\n",
" <th>roar_latitude</th>\n",
" <th>roar_longitude</th>\n",
" <th>roar_relation_type</th>\n",
" <th>roar_relation_uri</th>\n",
" <th>roar_item_issues_id</th>\n",
" <th>roar_item_issues_type</th>\n",
" <th>roar_item_issues_description</th>\n",
" <th>roar_item_issues_timestamp</th>\n",
" <th>roar_item_issues_status</th>\n",
" <th>roar_item_issues_reported_by</th>\n",
" <th>roar_item_issues_resolved_by</th>\n",
" <th>roar_item_issues_comment</th>\n",
" <th>roar_item_issues_count</th>\n",
" <th>roar_sword_depositor</th>\n",
" <th>roar_sword_slug</th>\n",
" <th>roar_exemplar</th>\n",
" <th>roar_home_page</th>\n",
" <th>roar_title</th>\n",
" <th>roar_oai_pmh</th>\n",
" <th>roar_sword_endpoint</th>\n",
" <th>roar_rss_feed</th>\n",
" <th>roar_twitter_feed</th>\n",
" <th>roar_description</th>\n",
" <th>roar_fulltext</th>\n",
" <th>roar_open_access</th>\n",
" <th>roar_mandate</th>\n",
" <th>roar_organisation_title</th>\n",
" <th>roar_organisation_home_page</th>\n",
" <th>roar_location_country</th>\n",
" <th>roar_location_city</th>\n",
" <th>roar_location_latitude</th>\n",
" <th>roar_location_longitude</th>\n",
" <th>roar_software</th>\n",
" <th>roar_geoname</th>\n",
" <th>roar_version</th>\n",
" <th>roar_subjects</th>\n",
" <th>roar_date</th>\n",
" <th>roar_note</th>\n",
" <th>roar_suggestions</th>\n",
" <th>roar_activity_low</th>\n",
" <th>roar_activity_medium</th>\n",
" <th>roar_activity_high</th>\n",
" <th>roar_recordcount</th>\n",
" <th>roar_recordhistory</th>\n",
" <th>roar_fulltexts_total</th>\n",
" <th>roar_fulltexts_docs</th>\n",
" <th>roar_fulltexts_rtotal</th>\n",
" <th>roar_fulltexts_rdocs</th>\n",
" <th>roar_registry_name</th>\n",
" <th>roar_registry_id</th>\n",
" <th>roar_submit_to</th>\n",
" <th>roar_submitted_to_name</th>\n",
" <th>roar_submitted_to_done</th>\n",
" <th>roar_webometrics_rank</th>\n",
" <th>roar_webometrics_size</th>\n",
" <th>roar_webometrics_visibility</th>\n",
" <th>roar_webometrics_rich_files</th>\n",
" <th>roar_webometrics_scholar</th>\n",
" <th>roar_monthly_deposits</th>\n",
" <th>roar_total_deposits</th>\n",
" <th>roar_association</th>\n",
" <th>roar_unique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>dedup::d2ddea18f00665ce8623e36bd4e3c7c5</td>\n",
" <td>8237</td>\n",
" <td>8237</td>\n",
" <td>AIR | Archivio Istituzionale della Ricerca</td>\n",
" <td>roar</td>\n",
" <td>roar_8237</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>8237</td>\n",
" <td>17</td>\n",
" <td>archive</td>\n",
" <td>5268</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/82/37</td>\n",
" <td>2014-05-15 11:23:30</td>\n",
" <td>2014-05-19 05:42:47</td>\n",
" <td>2014-05-15 11:23:30</td>\n",
" <td>institutional</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://air.unimi.it</td>\n",
" <td>AIR | Archivio Istituzionale della Ricerca</td>\n",
" <td>http://air.unimi.it/dspace-oai/request</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>AIR (Archivio Istituzionale della ricerca) is ...</td>\n",
" <td>FALSE</td>\n",
" <td>FALSE</td>\n",
" <td>TRUE</td>\n",
" <td>Università degli Studi di Milano</td>\n",
" <td>http://www.unimi.it</td>\n",
" <td>it</td>\n",
" <td>Milan</td>\n",
" <td>45.46</td>\n",
" <td>9.1947</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_IT</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2014-05-04 17:40:53</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>99</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,6...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>celestial</td>\n",
" <td>1596</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roar_8237</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>dedup::4c5bcfec8584af0d967f1ab10179ca4b</td>\n",
" <td>2820</td>\n",
" <td>2820</td>\n",
" <td>USU Repository: Open Access Repository</td>\n",
" <td>roar</td>\n",
" <td>roar_2820</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2820</td>\n",
" <td>525</td>\n",
" <td>archive</td>\n",
" <td>65</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/28/20</td>\n",
" <td>2010-07-29 01:40:27</td>\n",
" <td>2012-01-19 11:37:49</td>\n",
" <td>2010-07-29 01:40:27</td>\n",
" <td>institutional</td>\n",
" <td>2372</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://repository.usu.ac.id</td>\n",
" <td>USU Repository: Open Access Repository</td>\n",
" <td>http://repository.usu.ac.id/oai/request</td>\n",
" <td>NaN</td>\n",
" <td>http://repository.usu.ac.id/feed/rss_2.0/site</td>\n",
" <td>NaN</td>\n",
" <td>Comprises of works by and/or about the univers...</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>FALSE</td>\n",
" <td>[USU Library, University of Sumatera Utara]</td>\n",
" <td>[http://www.usu.ac.id, http://library.usu.ac.id]</td>\n",
" <td>id</td>\n",
" <td>Medan</td>\n",
" <td>3.5595</td>\n",
" <td>98.6572</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_ID</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2010-01-15 10:09:25</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>100</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,51,52,...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[roarmap, opendoar, celestial]</td>\n",
" <td>[1717, 2101, 283]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roar_2820</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>dedup::c2ae5cb2426d96ed19a50b0b7d7c8e11</td>\n",
" <td>9487</td>\n",
" <td>9487</td>\n",
" <td>IR at NRF: Home</td>\n",
" <td>roar</td>\n",
" <td>roar_9487</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>9487</td>\n",
" <td>16</td>\n",
" <td>archive</td>\n",
" <td>6458</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/94/87</td>\n",
" <td>2015-05-15 14:03:55</td>\n",
" <td>2016-03-21 20:21:02</td>\n",
" <td>2015-05-15 14:03:55</td>\n",
" <td>multi</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://ir.nrf.ac.za/</td>\n",
" <td>IR at NRF: Home</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>The NRF receives its mandate from the National...</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>FALSE</td>\n",
" <td>National Research Foundation of South Africa</td>\n",
" <td>http://www.nrf.ac.za/</td>\n",
" <td>za</td>\n",
" <td>Pretoria</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_ZA</td>\n",
" <td>other</td>\n",
" <td>[AI, AS, B1]</td>\n",
" <td>2015-02-10 06:35:50</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roarmap</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roar_9487</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>dedup::1c65cef3dfd1e00c0b03923a1c591db4</td>\n",
" <td>1241</td>\n",
" <td>1241</td>\n",
" <td>Swansea Metropolitan University Repository</td>\n",
" <td>roar</td>\n",
" <td>roar_1241</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1241</td>\n",
" <td>583</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/12/41</td>\n",
" <td>2010-01-06 13:45:32</td>\n",
" <td>2011-07-18 05:57:23</td>\n",
" <td>2010-01-06 13:45:32</td>\n",
" <td>institutional</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://dspace.smu.ac.uk/dspace/</td>\n",
" <td>Swansea Metropolitan University Repository</td>\n",
" <td>http://dspace.smu.ac.uk/dspace-oai/request</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Users may set up RSS feeds to be alerted to ne...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Swansea Metropolitan University</td>\n",
" <td>http://www.smu.ac.uk/</td>\n",
" <td>gb</td>\n",
" <td>Swansea</td>\n",
" <td>51.6144</td>\n",
" <td>-3.8727</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_GB</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2008-05-15 11:29:17</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>135</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,135,13...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>[opendoar, celestial]</td>\n",
" <td>[1779, 1627]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roar_1241</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>dedup::4217ec5d78c4bc4e5bd006783482441f</td>\n",
" <td>15142</td>\n",
" <td>15142</td>\n",
" <td>Repositorio Institucional</td>\n",
" <td>roar</td>\n",
" <td>roar_15142</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>15142</td>\n",
" <td>11</td>\n",
" <td>archive</td>\n",
" <td>12132</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/01/51/42</td>\n",
" <td>2020-08-08 12:35:50</td>\n",
" <td>2021-01-25 22:45:10</td>\n",
" <td>2020-08-08 12:35:50</td>\n",
" <td>institutional</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://repositorio.undar.edu.pe/</td>\n",
" <td>Repositorio Institucional</td>\n",
" <td>http://repositorio.undar.edu.pe/</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>FALSE</td>\n",
" <td>FALSE</td>\n",
" <td>FALSE</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>pe</td>\n",
" <td>huanuco</td>\n",
" <td>-9.9269</td>\n",
" <td>-76.2396</td>\n",
" <td>dspace</td>\n",
" <td>geoname_2_PE</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2019-09-02 21:20:31</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>opendoar</td>\n",
" <td>http://v2.sherpa.ac.uk/id/repository/4422</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roar_15142</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" dedup_id duplicate_id original_id \\\n",
"0 dedup::d2ddea18f00665ce8623e36bd4e3c7c5 8237 8237 \n",
"1 dedup::4c5bcfec8584af0d967f1ab10179ca4b 2820 2820 \n",
"2 dedup::c2ae5cb2426d96ed19a50b0b7d7c8e11 9487 9487 \n",
"3 dedup::1c65cef3dfd1e00c0b03923a1c591db4 1241 1241 \n",
"4 dedup::4217ec5d78c4bc4e5bd006783482441f 15142 15142 \n",
"\n",
" name source unique_id \\\n",
"0 AIR | Archivio Istituzionale della Ricerca roar roar_8237 \n",
"1 USU Repository: Open Access Repository roar roar_2820 \n",
"2 IR at NRF: Home roar roar_9487 \n",
"3 Swansea Metropolitan University Repository roar roar_1241 \n",
"4 Repositorio Institucional roar roar_15142 \n",
"\n",
" FAIRsharing_id FAIRsharing_type FAIRsharing_attributes.created-at \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" FAIRsharing_attributes.updated-at FAIRsharing_attributes.metadata.doi \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.metadata.name FAIRsharing_attributes.metadata.status \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.metadata.contacts \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.homepage \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.identifier \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.description \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.support-links \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.year-creation \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.data-processes \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.legacy-ids \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.fairsharing-registry \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.record-type FAIRsharing_attributes.subjects \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.domains FAIRsharing_attributes.taxonomies \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.user-defined-tags FAIRsharing_attributes.countries \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.name FAIRsharing_attributes.abbreviation \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.url FAIRsharing_attributes.doi \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.fairsharing-licence \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.description FAIRsharing_attributes.publications \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.licence-links \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.citations \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.abbreviation \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.access-points \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.associated-tools \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.deprecation-date \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.deprecation-reason \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.tombstone FAIRsharing_unique_id \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" re3data_orgIdentifier re3data_repositoryName \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" re3data_repositoryName.language re3data_additionalName \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" re3data_repositoryURL re3data_repositoryIdentifier \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" re3data_repositoryContact re3data_description re3data_description.language \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_type re3data_size re3data_startDate re3data_endDate \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" re3data_repositoryLanguage re3data_subject re3data_missionStatementURL \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_contentType re3data_providerType re3data_keyword \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_institution re3data_policy re3data_databaseAccess \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_databaseLicense re3data_dataAccess re3data_dataLicense \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_dataUploadType re3data_dataUploadLicense re3data_software \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_versioning re3data_api re3data_pidSystem \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_citationGuidelineURL re3data_aidSystem re3data_enhancedPublication \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_qualityManagement re3data_certificate re3data_metadataStandard \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_syndication re3data_remarks re3data_entryDate re3data_lastUpdate \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" re3data_unique_id OpenDOAR_system_metadata.id \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" OpenDOAR_repository_metadata.name \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.alternativename \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.url OpenDOAR_repository_metadata.description \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" OpenDOAR_repository_metadata.type \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.content_languages \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_system_metadata.date_modified \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_system_metadata.date_created \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.content_subjects_phrases \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.content_types OpenDOAR_organization \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" OpenDOAR_policy_urls OpenDOAR_repository_metadata.software \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" OpenDOAR_repository_metadata.oai_url OpenDOAR_unique_id roar_eprintid \\\n",
"0 NaN NaN 8237 \n",
"1 NaN NaN 2820 \n",
"2 NaN NaN 9487 \n",
"3 NaN NaN 1241 \n",
"4 NaN NaN 15142 \n",
"\n",
" roar_rev_number roar_eprint_status roar_userid roar_importid roar_source \\\n",
"0 17 archive 5268 NaN NaN \n",
"1 525 archive 65 NaN NaN \n",
"2 16 archive 6458 NaN NaN \n",
"3 583 archive 1 NaN NaN \n",
"4 11 archive 12132 NaN NaN \n",
"\n",
" roar_dir roar_datestamp roar_lastmod \\\n",
"0 disk0/00/00/82/37 2014-05-15 11:23:30 2014-05-19 05:42:47 \n",
"1 disk0/00/00/28/20 2010-07-29 01:40:27 2012-01-19 11:37:49 \n",
"2 disk0/00/00/94/87 2015-05-15 14:03:55 2016-03-21 20:21:02 \n",
"3 disk0/00/00/12/41 2010-01-06 13:45:32 2011-07-18 05:57:23 \n",
"4 disk0/00/01/51/42 2020-08-08 12:35:50 2021-01-25 22:45:10 \n",
"\n",
" roar_status_changed roar_type roar_succeeds roar_commentary \\\n",
"0 2014-05-15 11:23:30 institutional NaN NaN \n",
"1 2010-07-29 01:40:27 institutional 2372 NaN \n",
"2 2015-05-15 14:03:55 multi NaN NaN \n",
"3 2010-01-06 13:45:32 institutional NaN NaN \n",
"4 2020-08-08 12:35:50 institutional NaN NaN \n",
"\n",
" roar_metadata_visibility roar_latitude roar_longitude roar_relation_type \\\n",
"0 show NaN NaN NaN \n",
"1 show NaN NaN NaN \n",
"2 show NaN NaN NaN \n",
"3 show NaN NaN NaN \n",
"4 show NaN NaN NaN \n",
"\n",
" roar_relation_uri roar_item_issues_id roar_item_issues_type \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_item_issues_description roar_item_issues_timestamp \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_item_issues_status roar_item_issues_reported_by \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_item_issues_resolved_by roar_item_issues_comment \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_item_issues_count roar_sword_depositor roar_sword_slug roar_exemplar \\\n",
"0 NaN NaN NaN NaN \n",
"1 0 NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 0 NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" roar_home_page \\\n",
"0 http://air.unimi.it \n",
"1 http://repository.usu.ac.id \n",
"2 http://ir.nrf.ac.za/ \n",
"3 http://dspace.smu.ac.uk/dspace/ \n",
"4 http://repositorio.undar.edu.pe/ \n",
"\n",
" roar_title \\\n",
"0 AIR | Archivio Istituzionale della Ricerca \n",
"1 USU Repository: Open Access Repository \n",
"2 IR at NRF: Home \n",
"3 Swansea Metropolitan University Repository \n",
"4 Repositorio Institucional \n",
"\n",
" roar_oai_pmh roar_sword_endpoint \\\n",
"0 http://air.unimi.it/dspace-oai/request NaN \n",
"1 http://repository.usu.ac.id/oai/request NaN \n",
"2 NaN NaN \n",
"3 http://dspace.smu.ac.uk/dspace-oai/request NaN \n",
"4 http://repositorio.undar.edu.pe/ NaN \n",
"\n",
" roar_rss_feed roar_twitter_feed \\\n",
"0 NaN NaN \n",
"1 http://repository.usu.ac.id/feed/rss_2.0/site NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_description roar_fulltext \\\n",
"0 AIR (Archivio Istituzionale della ricerca) is ... FALSE \n",
"1 Comprises of works by and/or about the univers... TRUE \n",
"2 The NRF receives its mandate from the National... TRUE \n",
"3 Users may set up RSS feeds to be alerted to ne... NaN \n",
"4 NaN FALSE \n",
"\n",
" roar_open_access roar_mandate roar_organisation_title \\\n",
"0 FALSE TRUE Università degli Studi di Milano \n",
"1 TRUE FALSE [USU Library, University of Sumatera Utara] \n",
"2 TRUE FALSE National Research Foundation of South Africa \n",
"3 NaN NaN Swansea Metropolitan University \n",
"4 FALSE FALSE NaN \n",
"\n",
" roar_organisation_home_page roar_location_country \\\n",
"0 http://www.unimi.it it \n",
"1 [http://www.usu.ac.id, http://library.usu.ac.id] id \n",
"2 http://www.nrf.ac.za/ za \n",
"3 http://www.smu.ac.uk/ gb \n",
"4 NaN pe \n",
"\n",
" roar_location_city roar_location_latitude roar_location_longitude \\\n",
"0 Milan 45.46 9.1947 \n",
"1 Medan 3.5595 98.6572 \n",
"2 Pretoria NaN NaN \n",
"3 Swansea 51.6144 -3.8727 \n",
"4 huanuco -9.9269 -76.2396 \n",
"\n",
" roar_software roar_geoname roar_version roar_subjects roar_date \\\n",
"0 dspace geoname_2_IT other NaN 2014-05-04 17:40:53 \n",
"1 dspace geoname_2_ID other NaN 2010-01-15 10:09:25 \n",
"2 dspace geoname_2_ZA other [AI, AS, B1] 2015-02-10 06:35:50 \n",
"3 dspace geoname_2_GB other NaN 2008-05-15 11:29:17 \n",
"4 dspace geoname_2_PE other NaN 2019-09-02 21:20:31 \n",
"\n",
" roar_note roar_suggestions roar_activity_low roar_activity_medium \\\n",
"0 NaN NaN 0 0 \n",
"1 NaN NaN 0 0 \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN 0 0 \n",
"4 NaN NaN NaN NaN \n",
"\n",
" roar_activity_high roar_recordcount \\\n",
"0 0 99 \n",
"1 0 100 \n",
"2 NaN NaN \n",
"3 0 135 \n",
"4 NaN NaN \n",
"\n",
" roar_recordhistory roar_fulltexts_total \\\n",
"0 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,6... NaN \n",
"1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,51,52,... NaN \n",
"2 NaN NaN \n",
"3 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,135,13... 0 \n",
"4 NaN NaN \n",
"\n",
" roar_fulltexts_docs roar_fulltexts_rtotal roar_fulltexts_rdocs \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 0 0 0 \n",
"4 NaN NaN NaN \n",
"\n",
" roar_registry_name roar_registry_id \\\n",
"0 celestial 1596 \n",
"1 [roarmap, opendoar, celestial] [1717, 2101, 283] \n",
"2 roarmap NaN \n",
"3 [opendoar, celestial] [1779, 1627] \n",
"4 opendoar http://v2.sherpa.ac.uk/id/repository/4422 \n",
"\n",
" roar_submit_to roar_submitted_to_name roar_submitted_to_done \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_webometrics_rank roar_webometrics_size roar_webometrics_visibility \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_webometrics_rich_files roar_webometrics_scholar roar_monthly_deposits \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_total_deposits roar_association roar_unique_id \n",
"0 NaN NaN roar_8237 \n",
"1 NaN NaN roar_2820 \n",
"2 NaN NaN roar_9487 \n",
"3 NaN NaN roar_1241 \n",
"4 NaN NaN roar_15142 "
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_within = dup_within.merge(fairsharing_df, left_on='unique_id', right_on='FAIRsharing_unique_id', how='left')\n",
"dup_within = dup_within.merge(re3data_df, left_on='unique_id', right_on='re3data_unique_id', how='left')\n",
"dup_within = dup_within.merge(opendoar_df, left_on='unique_id', right_on='OpenDOAR_unique_id', how='left')\n",
"dup_within = dup_within.merge(roar_df, left_on='unique_id', right_on='roar_unique_id', how='left')\n",
"dup_within.head()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dedup_id</th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" <th>unique_id</th>\n",
" <th>FAIRsharing_id</th>\n",
" <th>FAIRsharing_type</th>\n",
" <th>FAIRsharing_attributes.created-at</th>\n",
" <th>FAIRsharing_attributes.updated-at</th>\n",
" <th>FAIRsharing_attributes.metadata.doi</th>\n",
" <th>FAIRsharing_attributes.metadata.name</th>\n",
" <th>FAIRsharing_attributes.metadata.status</th>\n",
" <th>FAIRsharing_attributes.metadata.contacts</th>\n",
" <th>FAIRsharing_attributes.metadata.homepage</th>\n",
" <th>FAIRsharing_attributes.metadata.identifier</th>\n",
" <th>FAIRsharing_attributes.metadata.description</th>\n",
" <th>FAIRsharing_attributes.metadata.support-links</th>\n",
" <th>FAIRsharing_attributes.metadata.year-creation</th>\n",
" <th>FAIRsharing_attributes.metadata.data-processes</th>\n",
" <th>FAIRsharing_attributes.legacy-ids</th>\n",
" <th>FAIRsharing_attributes.fairsharing-registry</th>\n",
" <th>FAIRsharing_attributes.record-type</th>\n",
" <th>FAIRsharing_attributes.subjects</th>\n",
" <th>FAIRsharing_attributes.domains</th>\n",
" <th>FAIRsharing_attributes.taxonomies</th>\n",
" <th>FAIRsharing_attributes.user-defined-tags</th>\n",
" <th>FAIRsharing_attributes.countries</th>\n",
" <th>FAIRsharing_attributes.name</th>\n",
" <th>FAIRsharing_attributes.abbreviation</th>\n",
" <th>FAIRsharing_attributes.url</th>\n",
" <th>FAIRsharing_attributes.doi</th>\n",
" <th>FAIRsharing_attributes.fairsharing-licence</th>\n",
" <th>FAIRsharing_attributes.description</th>\n",
" <th>FAIRsharing_attributes.publications</th>\n",
" <th>FAIRsharing_attributes.licence-links</th>\n",
" <th>FAIRsharing_attributes.metadata.citations</th>\n",
" <th>FAIRsharing_attributes.metadata.abbreviation</th>\n",
" <th>FAIRsharing_attributes.metadata.access-points</th>\n",
" <th>FAIRsharing_attributes.metadata.associated-tools</th>\n",
" <th>FAIRsharing_attributes.metadata.deprecation-date</th>\n",
" <th>FAIRsharing_attributes.metadata.deprecation-reason</th>\n",
" <th>FAIRsharing_attributes.metadata.tombstone</th>\n",
" <th>FAIRsharing_unique_id</th>\n",
" <th>re3data_orgIdentifier</th>\n",
" <th>re3data_repositoryName</th>\n",
" <th>re3data_repositoryName.language</th>\n",
" <th>re3data_additionalName</th>\n",
" <th>re3data_repositoryURL</th>\n",
" <th>re3data_repositoryIdentifier</th>\n",
" <th>re3data_repositoryContact</th>\n",
" <th>re3data_description</th>\n",
" <th>re3data_description.language</th>\n",
" <th>re3data_type</th>\n",
" <th>re3data_size</th>\n",
" <th>re3data_startDate</th>\n",
" <th>re3data_endDate</th>\n",
" <th>re3data_repositoryLanguage</th>\n",
" <th>re3data_subject</th>\n",
" <th>re3data_missionStatementURL</th>\n",
" <th>re3data_contentType</th>\n",
" <th>re3data_providerType</th>\n",
" <th>re3data_keyword</th>\n",
" <th>re3data_institution</th>\n",
" <th>re3data_policy</th>\n",
" <th>re3data_databaseAccess</th>\n",
" <th>re3data_databaseLicense</th>\n",
" <th>re3data_dataAccess</th>\n",
" <th>re3data_dataLicense</th>\n",
" <th>re3data_dataUploadType</th>\n",
" <th>re3data_dataUploadLicense</th>\n",
" <th>re3data_software</th>\n",
" <th>re3data_versioning</th>\n",
" <th>re3data_api</th>\n",
" <th>re3data_pidSystem</th>\n",
" <th>re3data_citationGuidelineURL</th>\n",
" <th>re3data_aidSystem</th>\n",
" <th>re3data_enhancedPublication</th>\n",
" <th>re3data_qualityManagement</th>\n",
" <th>re3data_certificate</th>\n",
" <th>re3data_metadataStandard</th>\n",
" <th>re3data_syndication</th>\n",
" <th>re3data_remarks</th>\n",
" <th>re3data_entryDate</th>\n",
" <th>re3data_lastUpdate</th>\n",
" <th>re3data_unique_id</th>\n",
" <th>OpenDOAR_system_metadata.id</th>\n",
" <th>OpenDOAR_repository_metadata.name</th>\n",
" <th>OpenDOAR_repository_metadata.alternativename</th>\n",
" <th>OpenDOAR_repository_metadata.url</th>\n",
" <th>OpenDOAR_repository_metadata.description</th>\n",
" <th>OpenDOAR_repository_metadata.type</th>\n",
" <th>OpenDOAR_repository_metadata.content_languages</th>\n",
" <th>OpenDOAR_system_metadata.date_modified</th>\n",
" <th>OpenDOAR_system_metadata.date_created</th>\n",
" <th>OpenDOAR_repository_metadata.content_subjects_phrases</th>\n",
" <th>OpenDOAR_repository_metadata.content_types</th>\n",
" <th>OpenDOAR_organization</th>\n",
" <th>OpenDOAR_policy_urls</th>\n",
" <th>OpenDOAR_repository_metadata.software</th>\n",
" <th>OpenDOAR_repository_metadata.oai_url</th>\n",
" <th>OpenDOAR_unique_id</th>\n",
" <th>roar_eprintid</th>\n",
" <th>roar_rev_number</th>\n",
" <th>roar_eprint_status</th>\n",
" <th>roar_userid</th>\n",
" <th>roar_importid</th>\n",
" <th>roar_source</th>\n",
" <th>roar_dir</th>\n",
" <th>roar_datestamp</th>\n",
" <th>roar_lastmod</th>\n",
" <th>roar_status_changed</th>\n",
" <th>roar_type</th>\n",
" <th>roar_succeeds</th>\n",
" <th>roar_commentary</th>\n",
" <th>roar_metadata_visibility</th>\n",
" <th>roar_latitude</th>\n",
" <th>roar_longitude</th>\n",
" <th>roar_relation_type</th>\n",
" <th>roar_relation_uri</th>\n",
" <th>roar_item_issues_id</th>\n",
" <th>roar_item_issues_type</th>\n",
" <th>roar_item_issues_description</th>\n",
" <th>roar_item_issues_timestamp</th>\n",
" <th>roar_item_issues_status</th>\n",
" <th>roar_item_issues_reported_by</th>\n",
" <th>roar_item_issues_resolved_by</th>\n",
" <th>roar_item_issues_comment</th>\n",
" <th>roar_item_issues_count</th>\n",
" <th>roar_sword_depositor</th>\n",
" <th>roar_sword_slug</th>\n",
" <th>roar_exemplar</th>\n",
" <th>roar_home_page</th>\n",
" <th>roar_title</th>\n",
" <th>roar_oai_pmh</th>\n",
" <th>roar_sword_endpoint</th>\n",
" <th>roar_rss_feed</th>\n",
" <th>roar_twitter_feed</th>\n",
" <th>roar_description</th>\n",
" <th>roar_fulltext</th>\n",
" <th>roar_open_access</th>\n",
" <th>roar_mandate</th>\n",
" <th>roar_organisation_title</th>\n",
" <th>roar_organisation_home_page</th>\n",
" <th>roar_location_country</th>\n",
" <th>roar_location_city</th>\n",
" <th>roar_location_latitude</th>\n",
" <th>roar_location_longitude</th>\n",
" <th>roar_software</th>\n",
" <th>roar_geoname</th>\n",
" <th>roar_version</th>\n",
" <th>roar_subjects</th>\n",
" <th>roar_date</th>\n",
" <th>roar_note</th>\n",
" <th>roar_suggestions</th>\n",
" <th>roar_activity_low</th>\n",
" <th>roar_activity_medium</th>\n",
" <th>roar_activity_high</th>\n",
" <th>roar_recordcount</th>\n",
" <th>roar_recordhistory</th>\n",
" <th>roar_fulltexts_total</th>\n",
" <th>roar_fulltexts_docs</th>\n",
" <th>roar_fulltexts_rtotal</th>\n",
" <th>roar_fulltexts_rdocs</th>\n",
" <th>roar_registry_name</th>\n",
" <th>roar_registry_id</th>\n",
" <th>roar_submit_to</th>\n",
" <th>roar_submitted_to_name</th>\n",
" <th>roar_submitted_to_done</th>\n",
" <th>roar_webometrics_rank</th>\n",
" <th>roar_webometrics_size</th>\n",
" <th>roar_webometrics_visibility</th>\n",
" <th>roar_webometrics_rich_files</th>\n",
" <th>roar_webometrics_scholar</th>\n",
" <th>roar_monthly_deposits</th>\n",
" <th>roar_total_deposits</th>\n",
" <th>roar_association</th>\n",
" <th>roar_unique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>dedup::471c50ad1a156d7256eddfd747d77931</td>\n",
" <td>opendoar____::6351bf9dce654515bf1ddbd6426dfa97</td>\n",
" <td>1996</td>\n",
" <td>ehtc repositorio institucional</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_1996</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1996</td>\n",
" <td>{\"name\": \"ehtc repositorio institucional\", \"la...</td>\n",
" <td>[]</td>\n",
" <td>http://www.repositorio.ehtc.cu/jspui/</td>\n",
" <td>this site provides access to the hospitality a...</td>\n",
" <td>institutional</td>\n",
" <td>[\"es\"]</td>\n",
" <td>2019-10-17 14:34:31</td>\n",
" <td>2010-12-01 11:11:57</td>\n",
" <td>[\"business and economics\", \"education\"]</td>\n",
" <td>[\"journal_articles\", \"conference_and_workshop_...</td>\n",
" <td>[{\"name\": \"escuela de hoteler\\u00eda y turismo...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"1.6.2\"}</td>\n",
" <td>NaN</td>\n",
" <td>OpenDOAR_1996</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>dedup::69dafe8b58066478aea48f3d0f384820</td>\n",
" <td>2312</td>\n",
" <td>2312</td>\n",
" <td>Göteborgs universitets publikationer - e-publi...</td>\n",
" <td>roar</td>\n",
" <td>roar_2312</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2312</td>\n",
" <td>736</td>\n",
" <td>archive</td>\n",
" <td>1</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/23/12</td>\n",
" <td>2010-01-14 12:10:06</td>\n",
" <td>2011-07-18 06:01:08</td>\n",
" <td>2010-01-14 12:10:06</td>\n",
" <td>institutional</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://gupea.ub.gu.se/dspace/index.jsp</td>\n",
" <td>Göteborgs universitets publikationer - e-publi...</td>\n",
" <td>http://gupea.ub.gu.se/dspace-oai/request</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>This is an institutional repository providing ...</td>\n",
" <td>FALSE</td>\n",
" <td>FALSE</td>\n",
" <td>FALSE</td>\n",
" <td>Göteborgs Universitet</td>\n",
" <td>http://www.gu.se/</td>\n",
" <td>se</td>\n",
" <td>NaN</td>\n",
" <td>57.6975</td>\n",
" <td>11.9608</td>\n",
" <td>dspace</td>\n",
" <td>NaN</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2005-06-07 12:57:08</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>96</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[opendoar, celestial]</td>\n",
" <td>[1149, 1832]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roar_2312</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>dedup::8f822ac814829da24a7065b8131bdf47</td>\n",
" <td>opendoar____::a34bacf839b923770b2c360eefa26748</td>\n",
" <td>1035</td>\n",
" <td>kitami institute of technology repository</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_1035</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1035</td>\n",
" <td>{\"name\": \"kitami institute of technology repos...</td>\n",
" <td>[{\"name\": \"\\u5317\\u898b\\u5de5\\u696d\\u5927\\u5b6...</td>\n",
" <td>https://kitami-it.repo.nii.ac.jp/</td>\n",
" <td>this site is a university repository providing...</td>\n",
" <td>institutional</td>\n",
" <td>[\"ja\", \"en\"]</td>\n",
" <td>2020-09-09 11:57:56</td>\n",
" <td>2007-10-09 09:09:40</td>\n",
" <td>[\"technology general\"]</td>\n",
" <td>[\"journal_articles\", \"unpub_reports_and_workin...</td>\n",
" <td>[{\"name\": \"kitami institute of technology\", \"a...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"weko\", \"version\": \"\"}</td>\n",
" <td>http://kitami-it.repo.nii.ac.jp/oai</td>\n",
" <td>OpenDOAR_1035</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>dedup::63a99723ebb3af94d52b474c3b21dbe1</td>\n",
" <td>5779</td>\n",
" <td>5779</td>\n",
" <td>Sanok Digital Library</td>\n",
" <td>roar</td>\n",
" <td>roar_5779</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>5779</td>\n",
" <td>9</td>\n",
" <td>archive</td>\n",
" <td>8</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/00/57/79</td>\n",
" <td>2012-12-12 04:54:20</td>\n",
" <td>2012-12-15 02:36:20</td>\n",
" <td>2012-12-12 04:54:20</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>http://sanockabibliotekacyfrowa.pl/dlibra</td>\n",
" <td>Sanok Digital Library</td>\n",
" <td>http://sanockabibliotekacyfrowa.pl/dlibra/oai-...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>This site provides access to the digitised col...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>Digital-Center</td>\n",
" <td>http://www.digital-center.pl/</td>\n",
" <td>pl</td>\n",
" <td>NaN</td>\n",
" <td>52.4872</td>\n",
" <td>16.8493</td>\n",
" <td>NaN</td>\n",
" <td>geoname_2_PL</td>\n",
" <td>other</td>\n",
" <td>NaN</td>\n",
" <td>2012-08-05 15:12:12</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>19</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,19,19...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[opendoar, celestial]</td>\n",
" <td>[5072, 2545]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roar_5779</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>dedup::82680bfec0fa08346c1b10d30a3e3d4a</td>\n",
" <td>11212</td>\n",
" <td>11212</td>\n",
" <td>Publication Server of the Wuppertal Institute</td>\n",
" <td>roar</td>\n",
" <td>roar_11212</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>11212</td>\n",
" <td>12</td>\n",
" <td>archive</td>\n",
" <td>5611</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>disk0/00/01/12/12</td>\n",
" <td>2016-05-04 11:37:14</td>\n",
" <td>2016-05-07 01:37:18</td>\n",
" <td>2016-05-04 11:37:14</td>\n",
" <td>institutional</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>show</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>https://epub.wupperinst.org/home</td>\n",
" <td>Publication Server of the Wuppertal Institute\\...</td>\n",
" <td>https://epub.wupperinst.org/oai</td>\n",
" <td>NaN</td>\n",
" <td>https://epub.wupperinst.org/rss</td>\n",
" <td>NaN</td>\n",
" <td>\\n\\nOn this Publication Server of the Wupperta...</td>\n",
" <td>TRUE</td>\n",
" <td>TRUE</td>\n",
" <td>FALSE</td>\n",
" <td>Wuppertal Institut für Klima, Umwelt, Energie</td>\n",
" <td>http://wupperinst.org/</td>\n",
" <td>de</td>\n",
" <td>Wuppertal</td>\n",
" <td>51.2562</td>\n",
" <td>7.1508</td>\n",
" <td>opus</td>\n",
" <td>geoname_2_DE</td>\n",
" <td>other</td>\n",
" <td>[T1, HB, GE]</td>\n",
" <td>2016-04-28 13:58:38</td>\n",
" <td>NaN</td>\n",
" <td>please delete ID 5891</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[opendoar, celestial]</td>\n",
" <td>[6112, 2539]</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>roar_11212</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" dedup_id \\\n",
"0 dedup::471c50ad1a156d7256eddfd747d77931 \n",
"1 dedup::69dafe8b58066478aea48f3d0f384820 \n",
"2 dedup::8f822ac814829da24a7065b8131bdf47 \n",
"3 dedup::63a99723ebb3af94d52b474c3b21dbe1 \n",
"4 dedup::82680bfec0fa08346c1b10d30a3e3d4a \n",
"\n",
" duplicate_id original_id \\\n",
"0 opendoar____::6351bf9dce654515bf1ddbd6426dfa97 1996 \n",
"1 2312 2312 \n",
"2 opendoar____::a34bacf839b923770b2c360eefa26748 1035 \n",
"3 5779 5779 \n",
"4 11212 11212 \n",
"\n",
" name source unique_id \\\n",
"0 ehtc repositorio institucional OpenDOAR OpenDOAR_1996 \n",
"1 Göteborgs universitets publikationer - e-publi... roar roar_2312 \n",
"2 kitami institute of technology repository OpenDOAR OpenDOAR_1035 \n",
"3 Sanok Digital Library roar roar_5779 \n",
"4 Publication Server of the Wuppertal Institute roar roar_11212 \n",
"\n",
" FAIRsharing_id FAIRsharing_type FAIRsharing_attributes.created-at \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" FAIRsharing_attributes.updated-at FAIRsharing_attributes.metadata.doi \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.metadata.name FAIRsharing_attributes.metadata.status \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.metadata.contacts \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.homepage \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.identifier \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.description \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.support-links \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.year-creation \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.data-processes \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.legacy-ids \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.fairsharing-registry \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.record-type FAIRsharing_attributes.subjects \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.domains FAIRsharing_attributes.taxonomies \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.user-defined-tags FAIRsharing_attributes.countries \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.name FAIRsharing_attributes.abbreviation \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.url FAIRsharing_attributes.doi \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.fairsharing-licence \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.description FAIRsharing_attributes.publications \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.licence-links \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.citations \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.abbreviation \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.access-points \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.associated-tools \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.deprecation-date \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.deprecation-reason \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.tombstone FAIRsharing_unique_id \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" re3data_orgIdentifier re3data_repositoryName \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" re3data_repositoryName.language re3data_additionalName \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" re3data_repositoryURL re3data_repositoryIdentifier \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" re3data_repositoryContact re3data_description re3data_description.language \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_type re3data_size re3data_startDate re3data_endDate \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" re3data_repositoryLanguage re3data_subject re3data_missionStatementURL \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_contentType re3data_providerType re3data_keyword \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_institution re3data_policy re3data_databaseAccess \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_databaseLicense re3data_dataAccess re3data_dataLicense \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_dataUploadType re3data_dataUploadLicense re3data_software \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_versioning re3data_api re3data_pidSystem \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_citationGuidelineURL re3data_aidSystem re3data_enhancedPublication \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_qualityManagement re3data_certificate re3data_metadataStandard \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" re3data_syndication re3data_remarks re3data_entryDate re3data_lastUpdate \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" re3data_unique_id OpenDOAR_system_metadata.id \\\n",
"0 NaN 1996 \n",
"1 NaN NaN \n",
"2 NaN 1035 \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" OpenDOAR_repository_metadata.name \\\n",
"0 {\"name\": \"ehtc repositorio institucional\", \"la... \n",
"1 NaN \n",
"2 {\"name\": \"kitami institute of technology repos... \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.alternativename \\\n",
"0 [] \n",
"1 NaN \n",
"2 [{\"name\": \"\\u5317\\u898b\\u5de5\\u696d\\u5927\\u5b6... \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.url \\\n",
"0 http://www.repositorio.ehtc.cu/jspui/ \n",
"1 NaN \n",
"2 https://kitami-it.repo.nii.ac.jp/ \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.description \\\n",
"0 this site provides access to the hospitality a... \n",
"1 NaN \n",
"2 this site is a university repository providing... \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.type \\\n",
"0 institutional \n",
"1 NaN \n",
"2 institutional \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.content_languages \\\n",
"0 [\"es\"] \n",
"1 NaN \n",
"2 [\"ja\", \"en\"] \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_system_metadata.date_modified \\\n",
"0 2019-10-17 14:34:31 \n",
"1 NaN \n",
"2 2020-09-09 11:57:56 \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_system_metadata.date_created \\\n",
"0 2010-12-01 11:11:57 \n",
"1 NaN \n",
"2 2007-10-09 09:09:40 \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.content_subjects_phrases \\\n",
"0 [\"business and economics\", \"education\"] \n",
"1 NaN \n",
"2 [\"technology general\"] \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.content_types \\\n",
"0 [\"journal_articles\", \"conference_and_workshop_... \n",
"1 NaN \n",
"2 [\"journal_articles\", \"unpub_reports_and_workin... \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_organization OpenDOAR_policy_urls \\\n",
"0 [{\"name\": \"escuela de hoteler\\u00eda y turismo... [] \n",
"1 NaN NaN \n",
"2 [{\"name\": \"kitami institute of technology\", \"a... [] \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" OpenDOAR_repository_metadata.software \\\n",
"0 {\"name\": \"dspace\", \"version\": \"1.6.2\"} \n",
"1 NaN \n",
"2 {\"name\": \"weko\", \"version\": \"\"} \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.oai_url OpenDOAR_unique_id roar_eprintid \\\n",
"0 NaN OpenDOAR_1996 NaN \n",
"1 NaN NaN 2312 \n",
"2 http://kitami-it.repo.nii.ac.jp/oai OpenDOAR_1035 NaN \n",
"3 NaN NaN 5779 \n",
"4 NaN NaN 11212 \n",
"\n",
" roar_rev_number roar_eprint_status roar_userid roar_importid roar_source \\\n",
"0 NaN NaN NaN NaN NaN \n",
"1 736 archive 1 NaN NaN \n",
"2 NaN NaN NaN NaN NaN \n",
"3 9 archive 8 NaN NaN \n",
"4 12 archive 5611 NaN NaN \n",
"\n",
" roar_dir roar_datestamp roar_lastmod \\\n",
"0 NaN NaN NaN \n",
"1 disk0/00/00/23/12 2010-01-14 12:10:06 2011-07-18 06:01:08 \n",
"2 NaN NaN NaN \n",
"3 disk0/00/00/57/79 2012-12-12 04:54:20 2012-12-15 02:36:20 \n",
"4 disk0/00/01/12/12 2016-05-04 11:37:14 2016-05-07 01:37:18 \n",
"\n",
" roar_status_changed roar_type roar_succeeds roar_commentary \\\n",
"0 NaN NaN NaN NaN \n",
"1 2010-01-14 12:10:06 institutional NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 2012-12-12 04:54:20 other NaN NaN \n",
"4 2016-05-04 11:37:14 institutional NaN NaN \n",
"\n",
" roar_metadata_visibility roar_latitude roar_longitude roar_relation_type \\\n",
"0 NaN NaN NaN NaN \n",
"1 show NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 show NaN NaN NaN \n",
"4 show NaN NaN NaN \n",
"\n",
" roar_relation_uri roar_item_issues_id roar_item_issues_type \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_item_issues_description roar_item_issues_timestamp \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_item_issues_status roar_item_issues_reported_by \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_item_issues_resolved_by roar_item_issues_comment \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_item_issues_count roar_sword_depositor roar_sword_slug roar_exemplar \\\n",
"0 NaN NaN NaN NaN \n",
"1 0 NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" roar_home_page \\\n",
"0 NaN \n",
"1 http://gupea.ub.gu.se/dspace/index.jsp \n",
"2 NaN \n",
"3 http://sanockabibliotekacyfrowa.pl/dlibra \n",
"4 https://epub.wupperinst.org/home \n",
"\n",
" roar_title \\\n",
"0 NaN \n",
"1 Göteborgs universitets publikationer - e-publi... \n",
"2 NaN \n",
"3 Sanok Digital Library \n",
"4 Publication Server of the Wuppertal Institute\\... \n",
"\n",
" roar_oai_pmh roar_sword_endpoint \\\n",
"0 NaN NaN \n",
"1 http://gupea.ub.gu.se/dspace-oai/request NaN \n",
"2 NaN NaN \n",
"3 http://sanockabibliotekacyfrowa.pl/dlibra/oai-... NaN \n",
"4 https://epub.wupperinst.org/oai NaN \n",
"\n",
" roar_rss_feed roar_twitter_feed \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 https://epub.wupperinst.org/rss NaN \n",
"\n",
" roar_description roar_fulltext \\\n",
"0 NaN NaN \n",
"1 This is an institutional repository providing ... FALSE \n",
"2 NaN NaN \n",
"3 This site provides access to the digitised col... NaN \n",
"4 \\n\\nOn this Publication Server of the Wupperta... TRUE \n",
"\n",
" roar_open_access roar_mandate \\\n",
"0 NaN NaN \n",
"1 FALSE FALSE \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 TRUE FALSE \n",
"\n",
" roar_organisation_title \\\n",
"0 NaN \n",
"1 Göteborgs Universitet \n",
"2 NaN \n",
"3 Digital-Center \n",
"4 Wuppertal Institut für Klima, Umwelt, Energie \n",
"\n",
" roar_organisation_home_page roar_location_country roar_location_city \\\n",
"0 NaN NaN NaN \n",
"1 http://www.gu.se/ se NaN \n",
"2 NaN NaN NaN \n",
"3 http://www.digital-center.pl/ pl NaN \n",
"4 http://wupperinst.org/ de Wuppertal \n",
"\n",
" roar_location_latitude roar_location_longitude roar_software roar_geoname \\\n",
"0 NaN NaN NaN NaN \n",
"1 57.6975 11.9608 dspace NaN \n",
"2 NaN NaN NaN NaN \n",
"3 52.4872 16.8493 NaN geoname_2_PL \n",
"4 51.2562 7.1508 opus geoname_2_DE \n",
"\n",
" roar_version roar_subjects roar_date roar_note \\\n",
"0 NaN NaN NaN NaN \n",
"1 other NaN 2005-06-07 12:57:08 NaN \n",
"2 NaN NaN NaN NaN \n",
"3 other NaN 2012-08-05 15:12:12 NaN \n",
"4 other [T1, HB, GE] 2016-04-28 13:58:38 NaN \n",
"\n",
" roar_suggestions roar_activity_low roar_activity_medium \\\n",
"0 NaN NaN NaN \n",
"1 NaN 0 0 \n",
"2 NaN NaN NaN \n",
"3 NaN 0 0 \n",
"4 please delete ID 5891 NaN NaN \n",
"\n",
" roar_activity_high roar_recordcount \\\n",
"0 NaN NaN \n",
"1 0 96 \n",
"2 NaN NaN \n",
"3 0 19 \n",
"4 NaN NaN \n",
"\n",
" roar_recordhistory roar_fulltexts_total \\\n",
"0 NaN NaN \n",
"1 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... NaN \n",
"2 NaN NaN \n",
"3 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19,19,19... NaN \n",
"4 NaN NaN \n",
"\n",
" roar_fulltexts_docs roar_fulltexts_rtotal roar_fulltexts_rdocs \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_registry_name roar_registry_id roar_submit_to \\\n",
"0 NaN NaN NaN \n",
"1 [opendoar, celestial] [1149, 1832] NaN \n",
"2 NaN NaN NaN \n",
"3 [opendoar, celestial] [5072, 2545] NaN \n",
"4 [opendoar, celestial] [6112, 2539] NaN \n",
"\n",
" roar_submitted_to_name roar_submitted_to_done roar_webometrics_rank \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_webometrics_size roar_webometrics_visibility \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_webometrics_rich_files roar_webometrics_scholar roar_monthly_deposits \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_total_deposits roar_association roar_unique_id \n",
"0 NaN NaN NaN \n",
"1 NaN NaN roar_2312 \n",
"2 NaN NaN NaN \n",
"3 NaN NaN roar_5779 \n",
"4 NaN NaN roar_11212 "
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_hybrid = dup_hybrid.merge(fairsharing_df, left_on='unique_id', right_on='FAIRsharing_unique_id', how='left')\n",
"dup_hybrid = dup_hybrid.merge(re3data_df, left_on='unique_id', right_on='re3data_unique_id', how='left')\n",
"dup_hybrid = dup_hybrid.merge(opendoar_df, left_on='unique_id', right_on='OpenDOAR_unique_id', how='left')\n",
"dup_hybrid = dup_hybrid.merge(roar_df, left_on='unique_id', right_on='roar_unique_id', how='left')\n",
"dup_hybrid.head()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dedup_id</th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" <th>unique_id</th>\n",
" <th>FAIRsharing_id</th>\n",
" <th>FAIRsharing_type</th>\n",
" <th>FAIRsharing_attributes.created-at</th>\n",
" <th>FAIRsharing_attributes.updated-at</th>\n",
" <th>FAIRsharing_attributes.metadata.doi</th>\n",
" <th>FAIRsharing_attributes.metadata.name</th>\n",
" <th>FAIRsharing_attributes.metadata.status</th>\n",
" <th>FAIRsharing_attributes.metadata.contacts</th>\n",
" <th>FAIRsharing_attributes.metadata.homepage</th>\n",
" <th>FAIRsharing_attributes.metadata.identifier</th>\n",
" <th>FAIRsharing_attributes.metadata.description</th>\n",
" <th>FAIRsharing_attributes.metadata.support-links</th>\n",
" <th>FAIRsharing_attributes.metadata.year-creation</th>\n",
" <th>FAIRsharing_attributes.metadata.data-processes</th>\n",
" <th>FAIRsharing_attributes.legacy-ids</th>\n",
" <th>FAIRsharing_attributes.fairsharing-registry</th>\n",
" <th>FAIRsharing_attributes.record-type</th>\n",
" <th>FAIRsharing_attributes.subjects</th>\n",
" <th>FAIRsharing_attributes.domains</th>\n",
" <th>FAIRsharing_attributes.taxonomies</th>\n",
" <th>FAIRsharing_attributes.user-defined-tags</th>\n",
" <th>FAIRsharing_attributes.countries</th>\n",
" <th>FAIRsharing_attributes.name</th>\n",
" <th>FAIRsharing_attributes.abbreviation</th>\n",
" <th>FAIRsharing_attributes.url</th>\n",
" <th>FAIRsharing_attributes.doi</th>\n",
" <th>FAIRsharing_attributes.fairsharing-licence</th>\n",
" <th>FAIRsharing_attributes.description</th>\n",
" <th>FAIRsharing_attributes.publications</th>\n",
" <th>FAIRsharing_attributes.licence-links</th>\n",
" <th>FAIRsharing_attributes.metadata.citations</th>\n",
" <th>FAIRsharing_attributes.metadata.abbreviation</th>\n",
" <th>FAIRsharing_attributes.metadata.access-points</th>\n",
" <th>FAIRsharing_attributes.metadata.associated-tools</th>\n",
" <th>FAIRsharing_attributes.metadata.deprecation-date</th>\n",
" <th>FAIRsharing_attributes.metadata.deprecation-reason</th>\n",
" <th>FAIRsharing_attributes.metadata.tombstone</th>\n",
" <th>FAIRsharing_unique_id</th>\n",
" <th>re3data_orgIdentifier</th>\n",
" <th>re3data_repositoryName</th>\n",
" <th>re3data_repositoryName.language</th>\n",
" <th>re3data_additionalName</th>\n",
" <th>re3data_repositoryURL</th>\n",
" <th>re3data_repositoryIdentifier</th>\n",
" <th>re3data_repositoryContact</th>\n",
" <th>re3data_description</th>\n",
" <th>re3data_description.language</th>\n",
" <th>re3data_type</th>\n",
" <th>re3data_size</th>\n",
" <th>re3data_startDate</th>\n",
" <th>re3data_endDate</th>\n",
" <th>re3data_repositoryLanguage</th>\n",
" <th>re3data_subject</th>\n",
" <th>re3data_missionStatementURL</th>\n",
" <th>re3data_contentType</th>\n",
" <th>re3data_providerType</th>\n",
" <th>re3data_keyword</th>\n",
" <th>re3data_institution</th>\n",
" <th>re3data_policy</th>\n",
" <th>re3data_databaseAccess</th>\n",
" <th>re3data_databaseLicense</th>\n",
" <th>re3data_dataAccess</th>\n",
" <th>re3data_dataLicense</th>\n",
" <th>re3data_dataUploadType</th>\n",
" <th>re3data_dataUploadLicense</th>\n",
" <th>re3data_software</th>\n",
" <th>re3data_versioning</th>\n",
" <th>re3data_api</th>\n",
" <th>re3data_pidSystem</th>\n",
" <th>re3data_citationGuidelineURL</th>\n",
" <th>re3data_aidSystem</th>\n",
" <th>re3data_enhancedPublication</th>\n",
" <th>re3data_qualityManagement</th>\n",
" <th>re3data_certificate</th>\n",
" <th>re3data_metadataStandard</th>\n",
" <th>re3data_syndication</th>\n",
" <th>re3data_remarks</th>\n",
" <th>re3data_entryDate</th>\n",
" <th>re3data_lastUpdate</th>\n",
" <th>re3data_unique_id</th>\n",
" <th>OpenDOAR_system_metadata.id</th>\n",
" <th>OpenDOAR_repository_metadata.name</th>\n",
" <th>OpenDOAR_repository_metadata.alternativename</th>\n",
" <th>OpenDOAR_repository_metadata.url</th>\n",
" <th>OpenDOAR_repository_metadata.description</th>\n",
" <th>OpenDOAR_repository_metadata.type</th>\n",
" <th>OpenDOAR_repository_metadata.content_languages</th>\n",
" <th>OpenDOAR_system_metadata.date_modified</th>\n",
" <th>OpenDOAR_system_metadata.date_created</th>\n",
" <th>OpenDOAR_repository_metadata.content_subjects_phrases</th>\n",
" <th>OpenDOAR_repository_metadata.content_types</th>\n",
" <th>OpenDOAR_organization</th>\n",
" <th>OpenDOAR_policy_urls</th>\n",
" <th>OpenDOAR_repository_metadata.software</th>\n",
" <th>OpenDOAR_repository_metadata.oai_url</th>\n",
" <th>OpenDOAR_unique_id</th>\n",
" <th>roar_eprintid</th>\n",
" <th>roar_rev_number</th>\n",
" <th>roar_eprint_status</th>\n",
" <th>roar_userid</th>\n",
" <th>roar_importid</th>\n",
" <th>roar_source</th>\n",
" <th>roar_dir</th>\n",
" <th>roar_datestamp</th>\n",
" <th>roar_lastmod</th>\n",
" <th>roar_status_changed</th>\n",
" <th>roar_type</th>\n",
" <th>roar_succeeds</th>\n",
" <th>roar_commentary</th>\n",
" <th>roar_metadata_visibility</th>\n",
" <th>roar_latitude</th>\n",
" <th>roar_longitude</th>\n",
" <th>roar_relation_type</th>\n",
" <th>roar_relation_uri</th>\n",
" <th>roar_item_issues_id</th>\n",
" <th>roar_item_issues_type</th>\n",
" <th>roar_item_issues_description</th>\n",
" <th>roar_item_issues_timestamp</th>\n",
" <th>roar_item_issues_status</th>\n",
" <th>roar_item_issues_reported_by</th>\n",
" <th>roar_item_issues_resolved_by</th>\n",
" <th>roar_item_issues_comment</th>\n",
" <th>roar_item_issues_count</th>\n",
" <th>roar_sword_depositor</th>\n",
" <th>roar_sword_slug</th>\n",
" <th>roar_exemplar</th>\n",
" <th>roar_home_page</th>\n",
" <th>roar_title</th>\n",
" <th>roar_oai_pmh</th>\n",
" <th>roar_sword_endpoint</th>\n",
" <th>roar_rss_feed</th>\n",
" <th>roar_twitter_feed</th>\n",
" <th>roar_description</th>\n",
" <th>roar_fulltext</th>\n",
" <th>roar_open_access</th>\n",
" <th>roar_mandate</th>\n",
" <th>roar_organisation_title</th>\n",
" <th>roar_organisation_home_page</th>\n",
" <th>roar_location_country</th>\n",
" <th>roar_location_city</th>\n",
" <th>roar_location_latitude</th>\n",
" <th>roar_location_longitude</th>\n",
" <th>roar_software</th>\n",
" <th>roar_geoname</th>\n",
" <th>roar_version</th>\n",
" <th>roar_subjects</th>\n",
" <th>roar_date</th>\n",
" <th>roar_note</th>\n",
" <th>roar_suggestions</th>\n",
" <th>roar_activity_low</th>\n",
" <th>roar_activity_medium</th>\n",
" <th>roar_activity_high</th>\n",
" <th>roar_recordcount</th>\n",
" <th>roar_recordhistory</th>\n",
" <th>roar_fulltexts_total</th>\n",
" <th>roar_fulltexts_docs</th>\n",
" <th>roar_fulltexts_rtotal</th>\n",
" <th>roar_fulltexts_rdocs</th>\n",
" <th>roar_registry_name</th>\n",
" <th>roar_registry_id</th>\n",
" <th>roar_submit_to</th>\n",
" <th>roar_submitted_to_name</th>\n",
" <th>roar_submitted_to_done</th>\n",
" <th>roar_webometrics_rank</th>\n",
" <th>roar_webometrics_size</th>\n",
" <th>roar_webometrics_visibility</th>\n",
" <th>roar_webometrics_rich_files</th>\n",
" <th>roar_webometrics_scholar</th>\n",
" <th>roar_monthly_deposits</th>\n",
" <th>roar_total_deposits</th>\n",
" <th>roar_association</th>\n",
" <th>roar_unique_id</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>dedup::860320be12a1c050cd7731794e231bd3</td>\n",
" <td>opendoar____::2290a7385ed77cc5592dc2153229f082</td>\n",
" <td>1064</td>\n",
" <td>oxford university research archive</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_1064</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1064</td>\n",
" <td>{\"name\": \"oxford university research archive\",...</td>\n",
" <td>[{\"acronym\": \"ora\"}]</td>\n",
" <td>http://ora.ox.ac.uk</td>\n",
" <td>this site provides access to the collected res...</td>\n",
" <td>institutional</td>\n",
" <td>[\"zh\", \"nl\", \"en\", \"fr\", \"de\", \"it\", \"ja\", \"pt...</td>\n",
" <td>2021-09-13 13:35:44</td>\n",
" <td>2007-10-10 16:16:02</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[\"journal_articles\", \"conference_and_workshop_...</td>\n",
" <td>[{\"name\": \"university of oxford\", \"alternative...</td>\n",
" <td>[{\"policy_url\": \"https://libguides.bodleian.ox...</td>\n",
" <td>{\"name\": \"fedora\", \"version\": \"4.6.2\"}</td>\n",
" <td>https://ora.ox.ac.uk/oai2</td>\n",
" <td>OpenDOAR_1064</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>dedup::1aa7a8773e6a7fdacbcedf9999009a38</td>\n",
" <td>opendoar____::191f8f858acda435ae0daf994e2a72c2</td>\n",
" <td>8648</td>\n",
" <td>digital commons@georgia southern</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_8648</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>8648</td>\n",
" <td>{\"name\": \"digital commons@georgia southern\", \"...</td>\n",
" <td>[]</td>\n",
" <td>https://digitalcommons.georgiasouthern.edu</td>\n",
" <td>this site provides access to the research outp...</td>\n",
" <td>institutional</td>\n",
" <td>[\"en\"]</td>\n",
" <td>2021-02-18 18:13:34</td>\n",
" <td>2019-09-28 04:24:47</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[\"journal_articles\", \"conference_and_workshop_...</td>\n",
" <td>[{\"name\": \"georgia southern university\", \"alte...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"digital_commons\", \"version\": \"\"}</td>\n",
" <td>https://digitalcommons.georgiasouthern.edu/do/oai</td>\n",
" <td>OpenDOAR_8648</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>dedup::31bceb0c3e2a260593e1e36655ebcee4</td>\n",
" <td>opendoar____::d5776aeecb3c45ab15adce6f5cb355f3</td>\n",
" <td>9713</td>\n",
" <td>materials data repository</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_9713</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>9713</td>\n",
" <td>{\"name\": \"materials data repository\", \"languag...</td>\n",
" <td>[{\"acronym\": \"mdr\"}]</td>\n",
" <td>https://mdr.nims.go.jp</td>\n",
" <td>mdr : materials data repository is a data repo...</td>\n",
" <td>institutional</td>\n",
" <td>[\"en\", \"ja\"]</td>\n",
" <td>2021-05-21 18:04:32</td>\n",
" <td>2020-07-13 10:09:55</td>\n",
" <td>[\"science general\"]</td>\n",
" <td>[\"journal_articles\", \"conference_and_workshop_...</td>\n",
" <td>[{\"name\": \"national institute for materials sc...</td>\n",
" <td>[]</td>\n",
" <td>{\"name\": \"fedora\", \"version\": \"\"}</td>\n",
" <td>https://mdr.nims.go.jp/catalog/oai</td>\n",
" <td>OpenDOAR_9713</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>dedup::e37b08dd3015330dcbb5d6663667b8b8</td>\n",
" <td>opendoar____::18997733ec258a9fcaf239cc55d53363</td>\n",
" <td>427</td>\n",
" <td>digital repository at the university of maryland</td>\n",
" <td>OpenDOAR</td>\n",
" <td>OpenDOAR_427</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>427</td>\n",
" <td>{\"name\": \"digital repository at the university...</td>\n",
" <td>[{\"acronym\": \"drum\"}]</td>\n",
" <td>http://drum.lib.umd.edu/</td>\n",
" <td>this site is a university repository providing...</td>\n",
" <td>institutional</td>\n",
" <td>[\"en\"]</td>\n",
" <td>2021-09-13 13:35:39</td>\n",
" <td>2006-08-04 09:09:20</td>\n",
" <td>[\"multidisciplinary\"]</td>\n",
" <td>[\"journal_articles\", \"theses_and_dissertations...</td>\n",
" <td>[{\"name\": \"university of maryland\", \"alternati...</td>\n",
" <td>[{\"policy_url\": \"http://drum.lib.umd.edu/page/...</td>\n",
" <td>{\"name\": \"dspace\", \"version\": \"4.1.0\"}</td>\n",
" <td>http://drum.lib.umd.edu/oai/request</td>\n",
" <td>OpenDOAR_427</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>dedup::2841194266115ac1cc04d19630cde46b</td>\n",
" <td>re3data_____::3afbb2b45a3dd218a5a091ca773cf6c5</td>\n",
" <td>r3d100011189</td>\n",
" <td>PRISM: University of Calgary's Digital Repository</td>\n",
" <td>re3data</td>\n",
" <td>re3data_r3d100011189</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>r3d100011189</td>\n",
" <td>PRISM: University of Calgary's Digital Repository</td>\n",
" <td>eng</td>\n",
" <td>[]</td>\n",
" <td>https://prism.ucalgary.ca/</td>\n",
" <td>[OpenDOAR:7771]</td>\n",
" <td>[\"digitize@ucalgary.ca\", \"kmeranji@ucalgary.ca\"]</td>\n",
" <td>PRISM is a digital archive of the University o...</td>\n",
" <td>eng</td>\n",
" <td>[institutional]</td>\n",
" <td>{\"size\": \"\", \"updatedp\": \"\"}</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>[\"eng\"]</td>\n",
" <td>[1 Humanities and Social Sciences, 11 Humaniti...</td>\n",
" <td>NaN</td>\n",
" <td>[Audiovisual data, Images, Standard office doc...</td>\n",
" <td>[dataProvider]</td>\n",
" <td>[multidisciplinary]</td>\n",
" <td>[{'institutionName': 'University of Calgary, L...</td>\n",
" <td>[{\"policyName\": \"Open Access Mandate\", \"policy...</td>\n",
" <td>{\"databaseAccessType\": \"open\", \"databaseAcces...</td>\n",
" <td>[]</td>\n",
" <td>[{\"dataAccessType\": \"open\", \"dataAccessRestric...</td>\n",
" <td>[{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"...</td>\n",
" <td>restricted</td>\n",
" <td>[{\"dataUploadLicenseName\": \"Submission Policy\"...</td>\n",
" <td>[\"DSpace\"]</td>\n",
" <td>NaN</td>\n",
" <td>[]</td>\n",
" <td>[\"DOI\", \"hdl\"]</td>\n",
" <td>NaN</td>\n",
" <td>[]</td>\n",
" <td>no</td>\n",
" <td>yes</td>\n",
" <td>[]</td>\n",
" <td>[]</td>\n",
" <td>{\"syndication\": \"http://prism.ucalgary.ca/feed...</td>\n",
" <td>NaN</td>\n",
" <td>2014-10-20</td>\n",
" <td>2020-01-09</td>\n",
" <td>re3data_r3d100011189</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" dedup_id \\\n",
"0 dedup::860320be12a1c050cd7731794e231bd3 \n",
"1 dedup::1aa7a8773e6a7fdacbcedf9999009a38 \n",
"2 dedup::31bceb0c3e2a260593e1e36655ebcee4 \n",
"3 dedup::e37b08dd3015330dcbb5d6663667b8b8 \n",
"4 dedup::2841194266115ac1cc04d19630cde46b \n",
"\n",
" duplicate_id original_id \\\n",
"0 opendoar____::2290a7385ed77cc5592dc2153229f082 1064 \n",
"1 opendoar____::191f8f858acda435ae0daf994e2a72c2 8648 \n",
"2 opendoar____::d5776aeecb3c45ab15adce6f5cb355f3 9713 \n",
"3 opendoar____::18997733ec258a9fcaf239cc55d53363 427 \n",
"4 re3data_____::3afbb2b45a3dd218a5a091ca773cf6c5 r3d100011189 \n",
"\n",
" name source \\\n",
"0 oxford university research archive OpenDOAR \n",
"1 digital commons@georgia southern OpenDOAR \n",
"2 materials data repository OpenDOAR \n",
"3 digital repository at the university of maryland OpenDOAR \n",
"4 PRISM: University of Calgary's Digital Repository re3data \n",
"\n",
" unique_id FAIRsharing_id FAIRsharing_type \\\n",
"0 OpenDOAR_1064 NaN NaN \n",
"1 OpenDOAR_8648 NaN NaN \n",
"2 OpenDOAR_9713 NaN NaN \n",
"3 OpenDOAR_427 NaN NaN \n",
"4 re3data_r3d100011189 NaN NaN \n",
"\n",
" FAIRsharing_attributes.created-at FAIRsharing_attributes.updated-at \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.metadata.doi FAIRsharing_attributes.metadata.name \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.metadata.status \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.contacts \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.homepage \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.identifier \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.description \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.support-links \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.year-creation \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.data-processes \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.legacy-ids \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.fairsharing-registry \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.record-type FAIRsharing_attributes.subjects \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.domains FAIRsharing_attributes.taxonomies \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.user-defined-tags FAIRsharing_attributes.countries \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.name FAIRsharing_attributes.abbreviation \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.url FAIRsharing_attributes.doi \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.fairsharing-licence \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.description FAIRsharing_attributes.publications \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" FAIRsharing_attributes.licence-links \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.citations \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.abbreviation \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.access-points \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.associated-tools \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.deprecation-date \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.deprecation-reason \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" FAIRsharing_attributes.metadata.tombstone FAIRsharing_unique_id \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" re3data_orgIdentifier re3data_repositoryName \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 r3d100011189 PRISM: University of Calgary's Digital Repository \n",
"\n",
" re3data_repositoryName.language re3data_additionalName \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 eng [] \n",
"\n",
" re3data_repositoryURL re3data_repositoryIdentifier \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 https://prism.ucalgary.ca/ [OpenDOAR:7771] \n",
"\n",
" re3data_repositoryContact \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 [\"digitize@ucalgary.ca\", \"kmeranji@ucalgary.ca\"] \n",
"\n",
" re3data_description \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 PRISM is a digital archive of the University o... \n",
"\n",
" re3data_description.language re3data_type re3data_size \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 eng [institutional] {\"size\": \"\", \"updatedp\": \"\"} \n",
"\n",
" re3data_startDate re3data_endDate re3data_repositoryLanguage \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN [\"eng\"] \n",
"\n",
" re3data_subject \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 [1 Humanities and Social Sciences, 11 Humaniti... \n",
"\n",
" re3data_missionStatementURL \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" re3data_contentType re3data_providerType \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 [Audiovisual data, Images, Standard office doc... [dataProvider] \n",
"\n",
" re3data_keyword re3data_institution \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 [multidisciplinary] [{'institutionName': 'University of Calgary, L... \n",
"\n",
" re3data_policy \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 [{\"policyName\": \"Open Access Mandate\", \"policy... \n",
"\n",
" re3data_databaseAccess re3data_databaseLicense \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 {\"databaseAccessType\": \"open\", \"databaseAcces... [] \n",
"\n",
" re3data_dataAccess \\\n",
"0 NaN \n",
"1 NaN \n",
"2 NaN \n",
"3 NaN \n",
"4 [{\"dataAccessType\": \"open\", \"dataAccessRestric... \n",
"\n",
" re3data_dataLicense re3data_dataUploadType \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 [{\"dataLicenseName\": \"CC\", \"dataLicenseURL\": \"... restricted \n",
"\n",
" re3data_dataUploadLicense re3data_software \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 [{\"dataUploadLicenseName\": \"Submission Policy\"... [\"DSpace\"] \n",
"\n",
" re3data_versioning re3data_api re3data_pidSystem \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN [] [\"DOI\", \"hdl\"] \n",
"\n",
" re3data_citationGuidelineURL re3data_aidSystem re3data_enhancedPublication \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN [] no \n",
"\n",
" re3data_qualityManagement re3data_certificate re3data_metadataStandard \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 yes [] [] \n",
"\n",
" re3data_syndication re3data_remarks \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 {\"syndication\": \"http://prism.ucalgary.ca/feed... NaN \n",
"\n",
" re3data_entryDate re3data_lastUpdate re3data_unique_id \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 2014-10-20 2020-01-09 re3data_r3d100011189 \n",
"\n",
" OpenDOAR_system_metadata.id \\\n",
"0 1064 \n",
"1 8648 \n",
"2 9713 \n",
"3 427 \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.name \\\n",
"0 {\"name\": \"oxford university research archive\",... \n",
"1 {\"name\": \"digital commons@georgia southern\", \"... \n",
"2 {\"name\": \"materials data repository\", \"languag... \n",
"3 {\"name\": \"digital repository at the university... \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.alternativename \\\n",
"0 [{\"acronym\": \"ora\"}] \n",
"1 [] \n",
"2 [{\"acronym\": \"mdr\"}] \n",
"3 [{\"acronym\": \"drum\"}] \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.url \\\n",
"0 http://ora.ox.ac.uk \n",
"1 https://digitalcommons.georgiasouthern.edu \n",
"2 https://mdr.nims.go.jp \n",
"3 http://drum.lib.umd.edu/ \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.description \\\n",
"0 this site provides access to the collected res... \n",
"1 this site provides access to the research outp... \n",
"2 mdr : materials data repository is a data repo... \n",
"3 this site is a university repository providing... \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.type \\\n",
"0 institutional \n",
"1 institutional \n",
"2 institutional \n",
"3 institutional \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.content_languages \\\n",
"0 [\"zh\", \"nl\", \"en\", \"fr\", \"de\", \"it\", \"ja\", \"pt... \n",
"1 [\"en\"] \n",
"2 [\"en\", \"ja\"] \n",
"3 [\"en\"] \n",
"4 NaN \n",
"\n",
" OpenDOAR_system_metadata.date_modified \\\n",
"0 2021-09-13 13:35:44 \n",
"1 2021-02-18 18:13:34 \n",
"2 2021-05-21 18:04:32 \n",
"3 2021-09-13 13:35:39 \n",
"4 NaN \n",
"\n",
" OpenDOAR_system_metadata.date_created \\\n",
"0 2007-10-10 16:16:02 \n",
"1 2019-09-28 04:24:47 \n",
"2 2020-07-13 10:09:55 \n",
"3 2006-08-04 09:09:20 \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.content_subjects_phrases \\\n",
"0 [\"multidisciplinary\"] \n",
"1 [\"multidisciplinary\"] \n",
"2 [\"science general\"] \n",
"3 [\"multidisciplinary\"] \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.content_types \\\n",
"0 [\"journal_articles\", \"conference_and_workshop_... \n",
"1 [\"journal_articles\", \"conference_and_workshop_... \n",
"2 [\"journal_articles\", \"conference_and_workshop_... \n",
"3 [\"journal_articles\", \"theses_and_dissertations... \n",
"4 NaN \n",
"\n",
" OpenDOAR_organization \\\n",
"0 [{\"name\": \"university of oxford\", \"alternative... \n",
"1 [{\"name\": \"georgia southern university\", \"alte... \n",
"2 [{\"name\": \"national institute for materials sc... \n",
"3 [{\"name\": \"university of maryland\", \"alternati... \n",
"4 NaN \n",
"\n",
" OpenDOAR_policy_urls \\\n",
"0 [{\"policy_url\": \"https://libguides.bodleian.ox... \n",
"1 [] \n",
"2 [] \n",
"3 [{\"policy_url\": \"http://drum.lib.umd.edu/page/... \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.software \\\n",
"0 {\"name\": \"fedora\", \"version\": \"4.6.2\"} \n",
"1 {\"name\": \"digital_commons\", \"version\": \"\"} \n",
"2 {\"name\": \"fedora\", \"version\": \"\"} \n",
"3 {\"name\": \"dspace\", \"version\": \"4.1.0\"} \n",
"4 NaN \n",
"\n",
" OpenDOAR_repository_metadata.oai_url OpenDOAR_unique_id \\\n",
"0 https://ora.ox.ac.uk/oai2 OpenDOAR_1064 \n",
"1 https://digitalcommons.georgiasouthern.edu/do/oai OpenDOAR_8648 \n",
"2 https://mdr.nims.go.jp/catalog/oai OpenDOAR_9713 \n",
"3 http://drum.lib.umd.edu/oai/request OpenDOAR_427 \n",
"4 NaN NaN \n",
"\n",
" roar_eprintid roar_rev_number roar_eprint_status roar_userid roar_importid \\\n",
"0 NaN NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN \n",
"\n",
" roar_source roar_dir roar_datestamp roar_lastmod roar_status_changed \\\n",
"0 NaN NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN \n",
"\n",
" roar_type roar_succeeds roar_commentary roar_metadata_visibility \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" roar_latitude roar_longitude roar_relation_type roar_relation_uri \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" roar_item_issues_id roar_item_issues_type roar_item_issues_description \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_item_issues_timestamp roar_item_issues_status \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_item_issues_reported_by roar_item_issues_resolved_by \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_item_issues_comment roar_item_issues_count roar_sword_depositor \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_sword_slug roar_exemplar roar_home_page roar_title roar_oai_pmh \\\n",
"0 NaN NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN \n",
"\n",
" roar_sword_endpoint roar_rss_feed roar_twitter_feed roar_description \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" roar_fulltext roar_open_access roar_mandate roar_organisation_title \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" roar_organisation_home_page roar_location_country roar_location_city \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_location_latitude roar_location_longitude roar_software roar_geoname \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" roar_version roar_subjects roar_date roar_note roar_suggestions \\\n",
"0 NaN NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN NaN \n",
"\n",
" roar_activity_low roar_activity_medium roar_activity_high roar_recordcount \\\n",
"0 NaN NaN NaN NaN \n",
"1 NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN \n",
"4 NaN NaN NaN NaN \n",
"\n",
" roar_recordhistory roar_fulltexts_total roar_fulltexts_docs \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_fulltexts_rtotal roar_fulltexts_rdocs roar_registry_name \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_registry_id roar_submit_to roar_submitted_to_name \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_submitted_to_done roar_webometrics_rank roar_webometrics_size \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_webometrics_visibility roar_webometrics_rich_files \\\n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN \n",
"\n",
" roar_webometrics_scholar roar_monthly_deposits roar_total_deposits \\\n",
"0 NaN NaN NaN \n",
"1 NaN NaN NaN \n",
"2 NaN NaN NaN \n",
"3 NaN NaN NaN \n",
"4 NaN NaN NaN \n",
"\n",
" roar_association roar_unique_id \n",
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
"3 NaN NaN \n",
"4 NaN NaN "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_across = dup_across.merge(fairsharing_df, left_on='unique_id', right_on='FAIRsharing_unique_id', how='left')\n",
"dup_across = dup_across.merge(re3data_df, left_on='unique_id', right_on='re3data_unique_id', how='left')\n",
"dup_across = dup_across.merge(opendoar_df, left_on='unique_id', right_on='OpenDOAR_unique_id', how='left')\n",
"dup_across = dup_across.merge(roar_df, left_on='unique_id', right_on='roar_unique_id', how='left')\n",
"dup_across.head()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<ipython-input-27-3881fa0a0224>:1: PerformanceWarning:\n",
"\n",
"DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
"\n",
"<ipython-input-27-3881fa0a0224>:2: PerformanceWarning:\n",
"\n",
"DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
"\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dedup_id</th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" <th>unique_id</th>\n",
" <th>FAIRsharing_id</th>\n",
" <th>FAIRsharing_type</th>\n",
" <th>FAIRsharing_attributes.created-at</th>\n",
" <th>FAIRsharing_attributes.updated-at</th>\n",
" <th>FAIRsharing_attributes.metadata.doi</th>\n",
" <th>FAIRsharing_attributes.metadata.name</th>\n",
" <th>FAIRsharing_attributes.metadata.status</th>\n",
" <th>FAIRsharing_attributes.metadata.contacts</th>\n",
" <th>FAIRsharing_attributes.metadata.homepage</th>\n",
" <th>FAIRsharing_attributes.metadata.identifier</th>\n",
" <th>FAIRsharing_attributes.metadata.description</th>\n",
" <th>FAIRsharing_attributes.metadata.support-links</th>\n",
" <th>FAIRsharing_attributes.metadata.year-creation</th>\n",
" <th>FAIRsharing_attributes.metadata.data-processes</th>\n",
" <th>FAIRsharing_attributes.legacy-ids</th>\n",
" <th>FAIRsharing_attributes.fairsharing-registry</th>\n",
" <th>FAIRsharing_attributes.record-type</th>\n",
" <th>FAIRsharing_attributes.subjects</th>\n",
" <th>FAIRsharing_attributes.domains</th>\n",
" <th>FAIRsharing_attributes.taxonomies</th>\n",
" <th>FAIRsharing_attributes.user-defined-tags</th>\n",
" <th>FAIRsharing_attributes.countries</th>\n",
" <th>FAIRsharing_attributes.name</th>\n",
" <th>FAIRsharing_attributes.abbreviation</th>\n",
" <th>FAIRsharing_attributes.url</th>\n",
" <th>FAIRsharing_attributes.doi</th>\n",
" <th>FAIRsharing_attributes.fairsharing-licence</th>\n",
" <th>FAIRsharing_attributes.description</th>\n",
" <th>FAIRsharing_attributes.publications</th>\n",
" <th>FAIRsharing_attributes.licence-links</th>\n",
" <th>FAIRsharing_attributes.metadata.citations</th>\n",
" <th>FAIRsharing_attributes.metadata.abbreviation</th>\n",
" <th>FAIRsharing_attributes.metadata.access-points</th>\n",
" <th>FAIRsharing_attributes.metadata.associated-tools</th>\n",
" <th>FAIRsharing_attributes.metadata.deprecation-date</th>\n",
" <th>FAIRsharing_attributes.metadata.deprecation-reason</th>\n",
" <th>FAIRsharing_attributes.metadata.tombstone</th>\n",
" <th>FAIRsharing_unique_id</th>\n",
" <th>re3data_orgIdentifier</th>\n",
" <th>re3data_repositoryName</th>\n",
" <th>re3data_repositoryName.language</th>\n",
" <th>re3data_additionalName</th>\n",
" <th>re3data_repositoryURL</th>\n",
" <th>re3data_repositoryIdentifier</th>\n",
" <th>re3data_repositoryContact</th>\n",
" <th>re3data_description</th>\n",
" <th>re3data_description.language</th>\n",
" <th>re3data_type</th>\n",
" <th>re3data_size</th>\n",
" <th>re3data_startDate</th>\n",
" <th>re3data_endDate</th>\n",
" <th>re3data_repositoryLanguage</th>\n",
" <th>re3data_subject</th>\n",
" <th>re3data_missionStatementURL</th>\n",
" <th>re3data_contentType</th>\n",
" <th>re3data_providerType</th>\n",
" <th>re3data_keyword</th>\n",
" <th>re3data_institution</th>\n",
" <th>re3data_policy</th>\n",
" <th>re3data_databaseAccess</th>\n",
" <th>re3data_databaseLicense</th>\n",
" <th>re3data_dataAccess</th>\n",
" <th>re3data_dataLicense</th>\n",
" <th>re3data_dataUploadType</th>\n",
" <th>re3data_dataUploadLicense</th>\n",
" <th>re3data_software</th>\n",
" <th>re3data_versioning</th>\n",
" <th>re3data_api</th>\n",
" <th>re3data_pidSystem</th>\n",
" <th>re3data_citationGuidelineURL</th>\n",
" <th>re3data_aidSystem</th>\n",
" <th>re3data_enhancedPublication</th>\n",
" <th>re3data_qualityManagement</th>\n",
" <th>re3data_certificate</th>\n",
" <th>re3data_metadataStandard</th>\n",
" <th>re3data_syndication</th>\n",
" <th>re3data_remarks</th>\n",
" <th>re3data_entryDate</th>\n",
" <th>re3data_lastUpdate</th>\n",
" <th>re3data_unique_id</th>\n",
" <th>OpenDOAR_system_metadata.id</th>\n",
" <th>OpenDOAR_repository_metadata.name</th>\n",
" <th>OpenDOAR_repository_metadata.alternativename</th>\n",
" <th>OpenDOAR_repository_metadata.url</th>\n",
" <th>OpenDOAR_repository_metadata.description</th>\n",
" <th>OpenDOAR_repository_metadata.type</th>\n",
" <th>OpenDOAR_repository_metadata.content_languages</th>\n",
" <th>OpenDOAR_system_metadata.date_modified</th>\n",
" <th>OpenDOAR_system_metadata.date_created</th>\n",
" <th>OpenDOAR_repository_metadata.content_subjects_phrases</th>\n",
" <th>OpenDOAR_repository_metadata.content_types</th>\n",
" <th>OpenDOAR_organization</th>\n",
" <th>OpenDOAR_policy_urls</th>\n",
" <th>OpenDOAR_repository_metadata.software</th>\n",
" <th>OpenDOAR_repository_metadata.oai_url</th>\n",
" <th>OpenDOAR_unique_id</th>\n",
" <th>roar_eprintid</th>\n",
" <th>roar_rev_number</th>\n",
" <th>roar_eprint_status</th>\n",
" <th>roar_userid</th>\n",
" <th>roar_importid</th>\n",
" <th>roar_source</th>\n",
" <th>roar_dir</th>\n",
" <th>roar_datestamp</th>\n",
" <th>roar_lastmod</th>\n",
" <th>roar_status_changed</th>\n",
" <th>roar_type</th>\n",
" <th>roar_succeeds</th>\n",
" <th>roar_commentary</th>\n",
" <th>roar_metadata_visibility</th>\n",
" <th>roar_latitude</th>\n",
" <th>roar_longitude</th>\n",
" <th>roar_relation_type</th>\n",
" <th>roar_relation_uri</th>\n",
" <th>roar_item_issues_id</th>\n",
" <th>roar_item_issues_type</th>\n",
" <th>roar_item_issues_description</th>\n",
" <th>roar_item_issues_timestamp</th>\n",
" <th>roar_item_issues_status</th>\n",
" <th>roar_item_issues_reported_by</th>\n",
" <th>roar_item_issues_resolved_by</th>\n",
" <th>roar_item_issues_comment</th>\n",
" <th>roar_item_issues_count</th>\n",
" <th>roar_sword_depositor</th>\n",
" <th>roar_sword_slug</th>\n",
" <th>roar_exemplar</th>\n",
" <th>roar_home_page</th>\n",
" <th>roar_title</th>\n",
" <th>roar_oai_pmh</th>\n",
" <th>roar_sword_endpoint</th>\n",
" <th>roar_rss_feed</th>\n",
" <th>roar_twitter_feed</th>\n",
" <th>roar_description</th>\n",
" <th>roar_fulltext</th>\n",
" <th>roar_open_access</th>\n",
" <th>roar_mandate</th>\n",
" <th>roar_organisation_title</th>\n",
" <th>roar_organisation_home_page</th>\n",
" <th>roar_location_country</th>\n",
" <th>roar_location_city</th>\n",
" <th>roar_location_latitude</th>\n",
" <th>roar_location_longitude</th>\n",
" <th>roar_software</th>\n",
" <th>roar_geoname</th>\n",
" <th>roar_version</th>\n",
" <th>roar_subjects</th>\n",
" <th>roar_date</th>\n",
" <th>roar_note</th>\n",
" <th>roar_suggestions</th>\n",
" <th>roar_activity_low</th>\n",
" <th>roar_activity_medium</th>\n",
" <th>roar_activity_high</th>\n",
" <th>roar_recordcount</th>\n",
" <th>roar_recordhistory</th>\n",
" <th>roar_fulltexts_total</th>\n",
" <th>roar_fulltexts_docs</th>\n",
" <th>roar_fulltexts_rtotal</th>\n",
" <th>roar_fulltexts_rdocs</th>\n",
" <th>roar_registry_name</th>\n",
" <th>roar_registry_id</th>\n",
" <th>roar_submit_to</th>\n",
" <th>roar_submitted_to_name</th>\n",
" <th>roar_submitted_to_done</th>\n",
" <th>roar_webometrics_rank</th>\n",
" <th>roar_webometrics_size</th>\n",
" <th>roar_webometrics_visibility</th>\n",
" <th>roar_webometrics_rich_files</th>\n",
" <th>roar_webometrics_scholar</th>\n",
" <th>roar_monthly_deposits</th>\n",
" <th>roar_total_deposits</th>\n",
" <th>roar_association</th>\n",
" <th>roar_unique_id</th>\n",
" <th>source_set</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>dedup::000871c1fc726f0b52dc86a4eeb027de</td>\n",
" <td>[4612, 4649]</td>\n",
" <td>[4612, 4649]</td>\n",
" <td>[IIT Bombay Institutional Repository, IIT Bomb...</td>\n",
" <td>[roar, roar]</td>\n",
" <td>[roar_4612, roar_4649]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[4612, 4649]</td>\n",
" <td>[28, 8]</td>\n",
" <td>[archive, archive]</td>\n",
" <td>[1380, 1380]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[disk0/00/00/46/12, disk0/00/00/46/49]</td>\n",
" <td>[2012-01-08 03:17:02, 2012-02-05 13:57:01]</td>\n",
" <td>[2012-04-16 10:53:04, 2012-04-16 10:39:58]</td>\n",
" <td>[2012-01-08 03:17:02, 2012-02-05 13:57:01]</td>\n",
" <td>[institutional, institutional]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[show, show]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[http://dspace.library.iitb.ac.in/jspui/, http...</td>\n",
" <td>[IIT Bombay Institutional Repository, IIT Bomb...</td>\n",
" <td>[http://dspace.library.iitb.ac.in/oai/request,...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[http://dspace.library.iitb.ac.in/xmlui/feed/a...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[TRUE, TRUE]</td>\n",
" <td>[TRUE, TRUE]</td>\n",
" <td>[TRUE, FALSE]</td>\n",
" <td>[IIT Bombay, IIT Bombay]</td>\n",
" <td>[http://www.iitb.ac.in, http://www.iitb.ac.in]</td>\n",
" <td>[in, in]</td>\n",
" <td>[Mumbai, Mumbai]</td>\n",
" <td>[19.133, 19.133]</td>\n",
" <td>[72.9166, 72.9166]</td>\n",
" <td>[dspace, dspace]</td>\n",
" <td>[geoname_2_IN, geoname_2_IN]</td>\n",
" <td>[other, other]</td>\n",
" <td>[[TA, TP, TD, TK, TJ, TN, TH], [T1, TA]]</td>\n",
" <td>[2011-12-15 09:01:35, 2012-01-05 12:09:37]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[0, nan]</td>\n",
" <td>[0, nan]</td>\n",
" <td>[0, nan]</td>\n",
" <td>[99, nan]</td>\n",
" <td>[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,95,...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[celestial, celestial]</td>\n",
" <td>[4790, 4789]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[roar_4612, roar_4649]</td>\n",
" <td>{roar}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>dedup::0163cceb20f5ca7b313419c068abd9dc</td>\n",
" <td>[7943, 8003]</td>\n",
" <td>[7943, 8003]</td>\n",
" <td>[EPrints@NIRT Library Welcomes! - EPrints@NITR...</td>\n",
" <td>[roar, roar]</td>\n",
" <td>[roar_7943, roar_8003]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[7943, 8003]</td>\n",
" <td>[16, 19]</td>\n",
" <td>[archive, archive]</td>\n",
" <td>[4963, 5023]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[disk0/00/00/79/43, disk0/00/00/80/03]</td>\n",
" <td>[2014-03-11 11:54:06, 2014-03-30 18:13:01]</td>\n",
" <td>[2014-05-08 13:07:12, 2014-05-08 12:55:41]</td>\n",
" <td>[2014-03-11 11:54:06, 2014-03-30 18:13:01]</td>\n",
" <td>[institutional, institutional]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[show, show]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[http://eprints.nirt.res.in/, http://eprints.n...</td>\n",
" <td>[EPrints@NIRT Library Welcomes! - EPrints@NITR...</td>\n",
" <td>[http://eprints.nirt.res.in/cgi/oai2, http://e...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[http://eprints.nirt.res.in/cgi/latest_tool?ou...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[This is the Institutional Repository of the N...</td>\n",
" <td>[TRUE, FALSE]</td>\n",
" <td>[TRUE, FALSE]</td>\n",
" <td>[FALSE, FALSE]</td>\n",
" <td>[National Institute for Research in Tuberculos...</td>\n",
" <td>[http://www.nirt.res.in/, http://www.nirt.res.in]</td>\n",
" <td>[in, in]</td>\n",
" <td>[Chennai, Chennai (Madras)]</td>\n",
" <td>[nan, 13]</td>\n",
" <td>[nan, 80]</td>\n",
" <td>[eprints, eprints]</td>\n",
" <td>[geoname_2_IN, geoname_2_IN]</td>\n",
" <td>[3.3.15 eps, 3.3.15 eps]</td>\n",
" <td>[[RB, RM], [R1, RZ]]</td>\n",
" <td>[2014-03-07 15:07:45, 2014-03-19 07:05:04]</td>\n",
" <td>[The National Institute for Research in Tuberc...</td>\n",
" <td>[nan, Please include \"Tuberculosis\" as a Speci...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[[opendoar, celestial], celestial]</td>\n",
" <td>[[5410, 2725], 5430]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[roar_7943, roar_8003]</td>\n",
" <td>{roar}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>dedup::028ee724157b05d04e7bdcf237d12e60</td>\n",
" <td>[2670, 2698, 2741]</td>\n",
" <td>[2670, 2698, 2741]</td>\n",
" <td>[HSF Brage Open Research Archive, HSF Brage Op...</td>\n",
" <td>[roar, roar, roar]</td>\n",
" <td>[roar_2670, roar_2698, roar_2741]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[2670, 2698, 2741]</td>\n",
" <td>[470, 317, 231]</td>\n",
" <td>[archive, archive, archive]</td>\n",
" <td>[235, 8, 8]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[disk0/00/00/26/70, disk0/00/00/26/98, disk0/0...</td>\n",
" <td>[2010-05-04 02:19:51, 2010-05-13 11:01:53, 201...</td>\n",
" <td>[2011-07-18 06:02:42, 2011-07-06 08:24:10, 201...</td>\n",
" <td>[2010-05-04 02:19:51, 2010-05-13 11:01:53, 201...</td>\n",
" <td>[institutional, institutional, institutional]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[show, show, show]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[0, 0, 0]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[http://brage.bibsys.no/hsf/, http://brage.bib...</td>\n",
" <td>[HSF Brage Open Research Archive, HSF Brage Op...</td>\n",
" <td>[http://oai.bibsys.no/oai/repository/nora_hsf_...</td>\n",
" <td>[http://brage.bibsys.no/hsf/?locale=en, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[This site provides access to the research out...</td>\n",
" <td>[TRUE, FALSE, FALSE]</td>\n",
" <td>[TRUE, FALSE, FALSE]</td>\n",
" <td>[FALSE, FALSE, FALSE]</td>\n",
" <td>[Sogn og Fjordane University College, Høgskule...</td>\n",
" <td>[http://www.hisf.no/, http://www.hisf.no/, htt...</td>\n",
" <td>[no, no, no]</td>\n",
" <td>[Sogndal, nan, nan]</td>\n",
" <td>[61.2174, 61.2174, 60.3904]</td>\n",
" <td>[7.1082, 7.1082, 5.3332]</td>\n",
" <td>[dspace, dspace, dspace]</td>\n",
" <td>[geoname_2_NO, nan, nan]</td>\n",
" <td>[other, other, other]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[2010-04-06 13:51:52, 2010-05-09 15:12:16, 201...</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[0, nan, nan]</td>\n",
" <td>[0, nan, nan]</td>\n",
" <td>[0, nan, nan]</td>\n",
" <td>[50, nan, nan]</td>\n",
" <td>[0,0,1,1,1,4,4,6,6,7,8,11,12,14,15,17,18,18,18...</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[[opendoar, celestial], opendoar, opendoar]</td>\n",
" <td>[[2426, 1781], 1781, 1807]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[roar_2670, roar_2698, roar_2741]</td>\n",
" <td>{roar}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>dedup::03593ce517feac573fdaafa6dcedef61</td>\n",
" <td>[4393, 4394]</td>\n",
" <td>[4393, 4394]</td>\n",
" <td>[Institutional Repository of Kunming Institute...</td>\n",
" <td>[roar, roar]</td>\n",
" <td>[roar_4393, roar_4394]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[4393, 4394]</td>\n",
" <td>[14, 14]</td>\n",
" <td>[archive, archive]</td>\n",
" <td>[986, 986]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[disk0/00/00/43/93, disk0/00/00/43/94]</td>\n",
" <td>[2011-11-09 23:14:52, 2011-11-09 23:14:46]</td>\n",
" <td>[2012-02-06 06:58:40, 2012-02-06 06:58:41]</td>\n",
" <td>[2011-11-09 23:14:52, 2011-11-09 23:14:46]</td>\n",
" <td>[institutional, institutional]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[show, show]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[http://159.226.149.42:8088/, http://159.226.1...</td>\n",
" <td>[Institutional Repository of Kunming Institute...</td>\n",
" <td>[http://159.226.149.42:8088/casirgrid-oai/requ...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[This site provides access to the output of th...</td>\n",
" <td>[TRUE, TRUE]</td>\n",
" <td>[TRUE, TRUE]</td>\n",
" <td>[FALSE, FALSE]</td>\n",
" <td>[ Kunming Institute of Zoology Chinese Academy...</td>\n",
" <td>[http://www.kiz.ac.cn/, http://www.kiz.ac.cn/]</td>\n",
" <td>[cn, cn]</td>\n",
" <td>[kunming, kunming]</td>\n",
" <td>[25.0416, 25.0416]</td>\n",
" <td>[102.755, 102.755]</td>\n",
" <td>[dspace, dspace]</td>\n",
" <td>[geoname_2_CN, geoname_2_CN]</td>\n",
" <td>[other, other]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[2010-07-22 16:00:13, 2010-07-22 16:00:13]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[0, 0]</td>\n",
" <td>[0, 0]</td>\n",
" <td>[0, 0]</td>\n",
" <td>[100, 100]</td>\n",
" <td>[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[celestial, celestial]</td>\n",
" <td>[4715, 4715]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[roar_4393, roar_4394]</td>\n",
" <td>{roar}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>dedup::03e0704b5690a2dee1861dc3ad3316c9</td>\n",
" <td>[1019, 5550]</td>\n",
" <td>[1019, 5550]</td>\n",
" <td>[PolyU Institutional Repository, PolyU Institu...</td>\n",
" <td>[roar, roar]</td>\n",
" <td>[roar_1019, roar_5550]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[1019, 5550]</td>\n",
" <td>[526, 9]</td>\n",
" <td>[archive, archive]</td>\n",
" <td>[1, 8]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[disk0/00/00/10/19, disk0/00/00/55/50]</td>\n",
" <td>[2010-01-06 13:45:03, 2012-12-12 01:25:48]</td>\n",
" <td>[2012-01-19 11:35:09, 2012-12-17 06:53:14]</td>\n",
" <td>[2010-01-06 13:45:03, 2012-12-12 01:25:48]</td>\n",
" <td>[institutional, institutional]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[show, show]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[0, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[http://repository.lib.polyu.edu.hk/, http://r...</td>\n",
" <td>[PolyU Institutional Repository, PolyU Institu...</td>\n",
" <td>[http://repository.lib.polyu.edu.hk/oai/reques...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, This is an Institutional repository prov...</td>\n",
" <td>[TRUE, nan]</td>\n",
" <td>[TRUE, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[The Hong Kong Polytechnic University Pao Yue-...</td>\n",
" <td>[http://www.lib.polyu.edu.hk, http://www.polyu...</td>\n",
" <td>[hk, cn]</td>\n",
" <td>[Hong Kong, nan]</td>\n",
" <td>[22.25, 22.3964]</td>\n",
" <td>[114.167, 114.109]</td>\n",
" <td>[dspace, dspace]</td>\n",
" <td>[geoname_2_HK, geoname_2_CN]</td>\n",
" <td>[other, other]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[2008-10-30 07:50:38, 2012-07-01 15:13:40]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[0, 0]</td>\n",
" <td>[0, 0]</td>\n",
" <td>[0, 0]</td>\n",
" <td>[86, 86]</td>\n",
" <td>[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,54,71,80,...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[[roarmap, opendoar, celestial], [opendoar, ce...</td>\n",
" <td>[[1441, 193, 1456], [1441, 1456]]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[roar_1019, roar_5550]</td>\n",
" <td>{roar}</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" dedup_id duplicate_id \\\n",
"0 dedup::000871c1fc726f0b52dc86a4eeb027de [4612, 4649] \n",
"1 dedup::0163cceb20f5ca7b313419c068abd9dc [7943, 8003] \n",
"2 dedup::028ee724157b05d04e7bdcf237d12e60 [2670, 2698, 2741] \n",
"3 dedup::03593ce517feac573fdaafa6dcedef61 [4393, 4394] \n",
"4 dedup::03e0704b5690a2dee1861dc3ad3316c9 [1019, 5550] \n",
"\n",
" original_id name \\\n",
"0 [4612, 4649] [IIT Bombay Institutional Repository, IIT Bomb... \n",
"1 [7943, 8003] [EPrints@NIRT Library Welcomes! - EPrints@NITR... \n",
"2 [2670, 2698, 2741] [HSF Brage Open Research Archive, HSF Brage Op... \n",
"3 [4393, 4394] [Institutional Repository of Kunming Institute... \n",
"4 [1019, 5550] [PolyU Institutional Repository, PolyU Institu... \n",
"\n",
" source unique_id FAIRsharing_id \\\n",
"0 [roar, roar] [roar_4612, roar_4649] [nan, nan] \n",
"1 [roar, roar] [roar_7943, roar_8003] [nan, nan] \n",
"2 [roar, roar, roar] [roar_2670, roar_2698, roar_2741] [nan, nan, nan] \n",
"3 [roar, roar] [roar_4393, roar_4394] [nan, nan] \n",
"4 [roar, roar] [roar_1019, roar_5550] [nan, nan] \n",
"\n",
" FAIRsharing_type FAIRsharing_attributes.created-at \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" FAIRsharing_attributes.updated-at FAIRsharing_attributes.metadata.doi \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.name FAIRsharing_attributes.metadata.status \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.contacts \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.homepage \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.identifier \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.description \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.support-links \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.year-creation \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.data-processes \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.legacy-ids \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.fairsharing-registry \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.record-type FAIRsharing_attributes.subjects \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" FAIRsharing_attributes.domains FAIRsharing_attributes.taxonomies \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" FAIRsharing_attributes.user-defined-tags FAIRsharing_attributes.countries \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" FAIRsharing_attributes.name FAIRsharing_attributes.abbreviation \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" FAIRsharing_attributes.url FAIRsharing_attributes.doi \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" FAIRsharing_attributes.fairsharing-licence \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.description FAIRsharing_attributes.publications \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" FAIRsharing_attributes.licence-links \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.citations \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.abbreviation \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.access-points \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.associated-tools \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.deprecation-date \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.deprecation-reason \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.tombstone FAIRsharing_unique_id \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" re3data_orgIdentifier re3data_repositoryName \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" re3data_repositoryName.language re3data_additionalName \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" re3data_repositoryURL re3data_repositoryIdentifier \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" re3data_repositoryContact re3data_description re3data_description.language \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_type re3data_size re3data_startDate re3data_endDate \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_repositoryLanguage re3data_subject re3data_missionStatementURL \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_contentType re3data_providerType re3data_keyword \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_institution re3data_policy re3data_databaseAccess \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_databaseLicense re3data_dataAccess re3data_dataLicense \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_dataUploadType re3data_dataUploadLicense re3data_software \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_versioning re3data_api re3data_pidSystem \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_citationGuidelineURL re3data_aidSystem re3data_enhancedPublication \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_qualityManagement re3data_certificate re3data_metadataStandard \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_syndication re3data_remarks re3data_entryDate re3data_lastUpdate \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_unique_id OpenDOAR_system_metadata.id \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" OpenDOAR_repository_metadata.name \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" OpenDOAR_repository_metadata.alternativename \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" OpenDOAR_repository_metadata.url OpenDOAR_repository_metadata.description \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" OpenDOAR_repository_metadata.type \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" OpenDOAR_repository_metadata.content_languages \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" OpenDOAR_system_metadata.date_modified \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" OpenDOAR_system_metadata.date_created \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" OpenDOAR_repository_metadata.content_subjects_phrases \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" OpenDOAR_repository_metadata.content_types OpenDOAR_organization \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" OpenDOAR_policy_urls OpenDOAR_repository_metadata.software \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" OpenDOAR_repository_metadata.oai_url OpenDOAR_unique_id roar_eprintid \\\n",
"0 [nan, nan] [nan, nan] [4612, 4649] \n",
"1 [nan, nan] [nan, nan] [7943, 8003] \n",
"2 [nan, nan, nan] [nan, nan, nan] [2670, 2698, 2741] \n",
"3 [nan, nan] [nan, nan] [4393, 4394] \n",
"4 [nan, nan] [nan, nan] [1019, 5550] \n",
"\n",
" roar_rev_number roar_eprint_status roar_userid \\\n",
"0 [28, 8] [archive, archive] [1380, 1380] \n",
"1 [16, 19] [archive, archive] [4963, 5023] \n",
"2 [470, 317, 231] [archive, archive, archive] [235, 8, 8] \n",
"3 [14, 14] [archive, archive] [986, 986] \n",
"4 [526, 9] [archive, archive] [1, 8] \n",
"\n",
" roar_importid roar_source \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" roar_dir \\\n",
"0 [disk0/00/00/46/12, disk0/00/00/46/49] \n",
"1 [disk0/00/00/79/43, disk0/00/00/80/03] \n",
"2 [disk0/00/00/26/70, disk0/00/00/26/98, disk0/0... \n",
"3 [disk0/00/00/43/93, disk0/00/00/43/94] \n",
"4 [disk0/00/00/10/19, disk0/00/00/55/50] \n",
"\n",
" roar_datestamp \\\n",
"0 [2012-01-08 03:17:02, 2012-02-05 13:57:01] \n",
"1 [2014-03-11 11:54:06, 2014-03-30 18:13:01] \n",
"2 [2010-05-04 02:19:51, 2010-05-13 11:01:53, 201... \n",
"3 [2011-11-09 23:14:52, 2011-11-09 23:14:46] \n",
"4 [2010-01-06 13:45:03, 2012-12-12 01:25:48] \n",
"\n",
" roar_lastmod \\\n",
"0 [2012-04-16 10:53:04, 2012-04-16 10:39:58] \n",
"1 [2014-05-08 13:07:12, 2014-05-08 12:55:41] \n",
"2 [2011-07-18 06:02:42, 2011-07-06 08:24:10, 201... \n",
"3 [2012-02-06 06:58:40, 2012-02-06 06:58:41] \n",
"4 [2012-01-19 11:35:09, 2012-12-17 06:53:14] \n",
"\n",
" roar_status_changed \\\n",
"0 [2012-01-08 03:17:02, 2012-02-05 13:57:01] \n",
"1 [2014-03-11 11:54:06, 2014-03-30 18:13:01] \n",
"2 [2010-05-04 02:19:51, 2010-05-13 11:01:53, 201... \n",
"3 [2011-11-09 23:14:52, 2011-11-09 23:14:46] \n",
"4 [2010-01-06 13:45:03, 2012-12-12 01:25:48] \n",
"\n",
" roar_type roar_succeeds \\\n",
"0 [institutional, institutional] [nan, nan] \n",
"1 [institutional, institutional] [nan, nan] \n",
"2 [institutional, institutional, institutional] [nan, nan, nan] \n",
"3 [institutional, institutional] [nan, nan] \n",
"4 [institutional, institutional] [nan, nan] \n",
"\n",
" roar_commentary roar_metadata_visibility roar_latitude roar_longitude \\\n",
"0 [nan, nan] [show, show] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [show, show] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [show, show, show] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [show, show] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [show, show] [nan, nan] [nan, nan] \n",
"\n",
" roar_relation_type roar_relation_uri roar_item_issues_id \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" roar_item_issues_type roar_item_issues_description \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" roar_item_issues_timestamp roar_item_issues_status \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" roar_item_issues_reported_by roar_item_issues_resolved_by \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" roar_item_issues_comment roar_item_issues_count roar_sword_depositor \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [0, 0, 0] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [0, nan] [nan, nan] \n",
"\n",
" roar_sword_slug roar_exemplar \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" roar_home_page \\\n",
"0 [http://dspace.library.iitb.ac.in/jspui/, http... \n",
"1 [http://eprints.nirt.res.in/, http://eprints.n... \n",
"2 [http://brage.bibsys.no/hsf/, http://brage.bib... \n",
"3 [http://159.226.149.42:8088/, http://159.226.1... \n",
"4 [http://repository.lib.polyu.edu.hk/, http://r... \n",
"\n",
" roar_title \\\n",
"0 [IIT Bombay Institutional Repository, IIT Bomb... \n",
"1 [EPrints@NIRT Library Welcomes! - EPrints@NITR... \n",
"2 [HSF Brage Open Research Archive, HSF Brage Op... \n",
"3 [Institutional Repository of Kunming Institute... \n",
"4 [PolyU Institutional Repository, PolyU Institu... \n",
"\n",
" roar_oai_pmh \\\n",
"0 [http://dspace.library.iitb.ac.in/oai/request,... \n",
"1 [http://eprints.nirt.res.in/cgi/oai2, http://e... \n",
"2 [http://oai.bibsys.no/oai/repository/nora_hsf_... \n",
"3 [http://159.226.149.42:8088/casirgrid-oai/requ... \n",
"4 [http://repository.lib.polyu.edu.hk/oai/reques... \n",
"\n",
" roar_sword_endpoint \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [http://brage.bibsys.no/hsf/?locale=en, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" roar_rss_feed roar_twitter_feed \\\n",
"0 [http://dspace.library.iitb.ac.in/xmlui/feed/a... [nan, nan] \n",
"1 [http://eprints.nirt.res.in/cgi/latest_tool?ou... [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" roar_description roar_fulltext \\\n",
"0 [nan, nan] [TRUE, TRUE] \n",
"1 [This is the Institutional Repository of the N... [TRUE, FALSE] \n",
"2 [This site provides access to the research out... [TRUE, FALSE, FALSE] \n",
"3 [This site provides access to the output of th... [TRUE, TRUE] \n",
"4 [nan, This is an Institutional repository prov... [TRUE, nan] \n",
"\n",
" roar_open_access roar_mandate \\\n",
"0 [TRUE, TRUE] [TRUE, FALSE] \n",
"1 [TRUE, FALSE] [FALSE, FALSE] \n",
"2 [TRUE, FALSE, FALSE] [FALSE, FALSE, FALSE] \n",
"3 [TRUE, TRUE] [FALSE, FALSE] \n",
"4 [TRUE, nan] [nan, nan] \n",
"\n",
" roar_organisation_title \\\n",
"0 [IIT Bombay, IIT Bombay] \n",
"1 [National Institute for Research in Tuberculos... \n",
"2 [Sogn og Fjordane University College, Høgskule... \n",
"3 [ Kunming Institute of Zoology Chinese Academy... \n",
"4 [The Hong Kong Polytechnic University Pao Yue-... \n",
"\n",
" roar_organisation_home_page roar_location_country \\\n",
"0 [http://www.iitb.ac.in, http://www.iitb.ac.in] [in, in] \n",
"1 [http://www.nirt.res.in/, http://www.nirt.res.in] [in, in] \n",
"2 [http://www.hisf.no/, http://www.hisf.no/, htt... [no, no, no] \n",
"3 [http://www.kiz.ac.cn/, http://www.kiz.ac.cn/] [cn, cn] \n",
"4 [http://www.lib.polyu.edu.hk, http://www.polyu... [hk, cn] \n",
"\n",
" roar_location_city roar_location_latitude \\\n",
"0 [Mumbai, Mumbai] [19.133, 19.133] \n",
"1 [Chennai, Chennai (Madras)] [nan, 13] \n",
"2 [Sogndal, nan, nan] [61.2174, 61.2174, 60.3904] \n",
"3 [kunming, kunming] [25.0416, 25.0416] \n",
"4 [Hong Kong, nan] [22.25, 22.3964] \n",
"\n",
" roar_location_longitude roar_software \\\n",
"0 [72.9166, 72.9166] [dspace, dspace] \n",
"1 [nan, 80] [eprints, eprints] \n",
"2 [7.1082, 7.1082, 5.3332] [dspace, dspace, dspace] \n",
"3 [102.755, 102.755] [dspace, dspace] \n",
"4 [114.167, 114.109] [dspace, dspace] \n",
"\n",
" roar_geoname roar_version \\\n",
"0 [geoname_2_IN, geoname_2_IN] [other, other] \n",
"1 [geoname_2_IN, geoname_2_IN] [3.3.15 eps, 3.3.15 eps] \n",
"2 [geoname_2_NO, nan, nan] [other, other, other] \n",
"3 [geoname_2_CN, geoname_2_CN] [other, other] \n",
"4 [geoname_2_HK, geoname_2_CN] [other, other] \n",
"\n",
" roar_subjects \\\n",
"0 [[TA, TP, TD, TK, TJ, TN, TH], [T1, TA]] \n",
"1 [[RB, RM], [R1, RZ]] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" roar_date \\\n",
"0 [2011-12-15 09:01:35, 2012-01-05 12:09:37] \n",
"1 [2014-03-07 15:07:45, 2014-03-19 07:05:04] \n",
"2 [2010-04-06 13:51:52, 2010-05-09 15:12:16, 201... \n",
"3 [2010-07-22 16:00:13, 2010-07-22 16:00:13] \n",
"4 [2008-10-30 07:50:38, 2012-07-01 15:13:40] \n",
"\n",
" roar_note \\\n",
"0 [nan, nan] \n",
"1 [The National Institute for Research in Tuberc... \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" roar_suggestions roar_activity_low \\\n",
"0 [nan, nan] [0, nan] \n",
"1 [nan, Please include \"Tuberculosis\" as a Speci... [nan, nan] \n",
"2 [nan, nan, nan] [0, nan, nan] \n",
"3 [nan, nan] [0, 0] \n",
"4 [nan, nan] [0, 0] \n",
"\n",
" roar_activity_medium roar_activity_high roar_recordcount \\\n",
"0 [0, nan] [0, nan] [99, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [0, nan, nan] [0, nan, nan] [50, nan, nan] \n",
"3 [0, 0] [0, 0] [100, 100] \n",
"4 [0, 0] [0, 0] [86, 86] \n",
"\n",
" roar_recordhistory roar_fulltexts_total \\\n",
"0 [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,95,... [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [0,0,1,1,1,4,4,6,6,7,8,11,12,14,15,17,18,18,18... [nan, nan, nan] \n",
"3 [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0... [nan, nan] \n",
"4 [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,54,71,80,... [nan, nan] \n",
"\n",
" roar_fulltexts_docs roar_fulltexts_rtotal roar_fulltexts_rdocs \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" roar_registry_name \\\n",
"0 [celestial, celestial] \n",
"1 [[opendoar, celestial], celestial] \n",
"2 [[opendoar, celestial], opendoar, opendoar] \n",
"3 [celestial, celestial] \n",
"4 [[roarmap, opendoar, celestial], [opendoar, ce... \n",
"\n",
" roar_registry_id roar_submit_to roar_submitted_to_name \\\n",
"0 [4790, 4789] [nan, nan] [nan, nan] \n",
"1 [[5410, 2725], 5430] [nan, nan] [nan, nan] \n",
"2 [[2426, 1781], 1781, 1807] [nan, nan, nan] [nan, nan, nan] \n",
"3 [4715, 4715] [nan, nan] [nan, nan] \n",
"4 [[1441, 193, 1456], [1441, 1456]] [nan, nan] [nan, nan] \n",
"\n",
" roar_submitted_to_done roar_webometrics_rank roar_webometrics_size \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" roar_webometrics_visibility roar_webometrics_rich_files \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" roar_webometrics_scholar roar_monthly_deposits roar_total_deposits \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" roar_association roar_unique_id source_set \n",
"0 [nan, nan] [roar_4612, roar_4649] {roar} \n",
"1 [nan, nan] [roar_7943, roar_8003] {roar} \n",
"2 [nan, nan, nan] [roar_2670, roar_2698, roar_2741] {roar} \n",
"3 [nan, nan] [roar_4393, roar_4394] {roar} \n",
"4 [nan, nan] [roar_1019, roar_5550] {roar} "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_within = dup_within.groupby('dedup_id').aggregate(list).reset_index()\n",
"dup_within['source_set'] = dup_within.source.map(set)\n",
"dup_within.head()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<ipython-input-28-89649d18870f>:1: PerformanceWarning:\n",
"\n",
"DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
"\n",
"<ipython-input-28-89649d18870f>:2: PerformanceWarning:\n",
"\n",
"DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
"\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dedup_id</th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" <th>unique_id</th>\n",
" <th>FAIRsharing_id</th>\n",
" <th>FAIRsharing_type</th>\n",
" <th>FAIRsharing_attributes.created-at</th>\n",
" <th>FAIRsharing_attributes.updated-at</th>\n",
" <th>FAIRsharing_attributes.metadata.doi</th>\n",
" <th>FAIRsharing_attributes.metadata.name</th>\n",
" <th>FAIRsharing_attributes.metadata.status</th>\n",
" <th>FAIRsharing_attributes.metadata.contacts</th>\n",
" <th>FAIRsharing_attributes.metadata.homepage</th>\n",
" <th>FAIRsharing_attributes.metadata.identifier</th>\n",
" <th>FAIRsharing_attributes.metadata.description</th>\n",
" <th>FAIRsharing_attributes.metadata.support-links</th>\n",
" <th>FAIRsharing_attributes.metadata.year-creation</th>\n",
" <th>FAIRsharing_attributes.metadata.data-processes</th>\n",
" <th>FAIRsharing_attributes.legacy-ids</th>\n",
" <th>FAIRsharing_attributes.fairsharing-registry</th>\n",
" <th>FAIRsharing_attributes.record-type</th>\n",
" <th>FAIRsharing_attributes.subjects</th>\n",
" <th>FAIRsharing_attributes.domains</th>\n",
" <th>FAIRsharing_attributes.taxonomies</th>\n",
" <th>FAIRsharing_attributes.user-defined-tags</th>\n",
" <th>FAIRsharing_attributes.countries</th>\n",
" <th>FAIRsharing_attributes.name</th>\n",
" <th>FAIRsharing_attributes.abbreviation</th>\n",
" <th>FAIRsharing_attributes.url</th>\n",
" <th>FAIRsharing_attributes.doi</th>\n",
" <th>FAIRsharing_attributes.fairsharing-licence</th>\n",
" <th>FAIRsharing_attributes.description</th>\n",
" <th>FAIRsharing_attributes.publications</th>\n",
" <th>FAIRsharing_attributes.licence-links</th>\n",
" <th>FAIRsharing_attributes.metadata.citations</th>\n",
" <th>FAIRsharing_attributes.metadata.abbreviation</th>\n",
" <th>FAIRsharing_attributes.metadata.access-points</th>\n",
" <th>FAIRsharing_attributes.metadata.associated-tools</th>\n",
" <th>FAIRsharing_attributes.metadata.deprecation-date</th>\n",
" <th>FAIRsharing_attributes.metadata.deprecation-reason</th>\n",
" <th>FAIRsharing_attributes.metadata.tombstone</th>\n",
" <th>FAIRsharing_unique_id</th>\n",
" <th>re3data_orgIdentifier</th>\n",
" <th>re3data_repositoryName</th>\n",
" <th>re3data_repositoryName.language</th>\n",
" <th>re3data_additionalName</th>\n",
" <th>re3data_repositoryURL</th>\n",
" <th>re3data_repositoryIdentifier</th>\n",
" <th>re3data_repositoryContact</th>\n",
" <th>re3data_description</th>\n",
" <th>re3data_description.language</th>\n",
" <th>re3data_type</th>\n",
" <th>re3data_size</th>\n",
" <th>re3data_startDate</th>\n",
" <th>re3data_endDate</th>\n",
" <th>re3data_repositoryLanguage</th>\n",
" <th>re3data_subject</th>\n",
" <th>re3data_missionStatementURL</th>\n",
" <th>re3data_contentType</th>\n",
" <th>re3data_providerType</th>\n",
" <th>re3data_keyword</th>\n",
" <th>re3data_institution</th>\n",
" <th>re3data_policy</th>\n",
" <th>re3data_databaseAccess</th>\n",
" <th>re3data_databaseLicense</th>\n",
" <th>re3data_dataAccess</th>\n",
" <th>re3data_dataLicense</th>\n",
" <th>re3data_dataUploadType</th>\n",
" <th>re3data_dataUploadLicense</th>\n",
" <th>re3data_software</th>\n",
" <th>re3data_versioning</th>\n",
" <th>re3data_api</th>\n",
" <th>re3data_pidSystem</th>\n",
" <th>re3data_citationGuidelineURL</th>\n",
" <th>re3data_aidSystem</th>\n",
" <th>re3data_enhancedPublication</th>\n",
" <th>re3data_qualityManagement</th>\n",
" <th>re3data_certificate</th>\n",
" <th>re3data_metadataStandard</th>\n",
" <th>re3data_syndication</th>\n",
" <th>re3data_remarks</th>\n",
" <th>re3data_entryDate</th>\n",
" <th>re3data_lastUpdate</th>\n",
" <th>re3data_unique_id</th>\n",
" <th>OpenDOAR_system_metadata.id</th>\n",
" <th>OpenDOAR_repository_metadata.name</th>\n",
" <th>OpenDOAR_repository_metadata.alternativename</th>\n",
" <th>OpenDOAR_repository_metadata.url</th>\n",
" <th>OpenDOAR_repository_metadata.description</th>\n",
" <th>OpenDOAR_repository_metadata.type</th>\n",
" <th>OpenDOAR_repository_metadata.content_languages</th>\n",
" <th>OpenDOAR_system_metadata.date_modified</th>\n",
" <th>OpenDOAR_system_metadata.date_created</th>\n",
" <th>OpenDOAR_repository_metadata.content_subjects_phrases</th>\n",
" <th>OpenDOAR_repository_metadata.content_types</th>\n",
" <th>OpenDOAR_organization</th>\n",
" <th>OpenDOAR_policy_urls</th>\n",
" <th>OpenDOAR_repository_metadata.software</th>\n",
" <th>OpenDOAR_repository_metadata.oai_url</th>\n",
" <th>OpenDOAR_unique_id</th>\n",
" <th>roar_eprintid</th>\n",
" <th>roar_rev_number</th>\n",
" <th>roar_eprint_status</th>\n",
" <th>roar_userid</th>\n",
" <th>roar_importid</th>\n",
" <th>roar_source</th>\n",
" <th>roar_dir</th>\n",
" <th>roar_datestamp</th>\n",
" <th>roar_lastmod</th>\n",
" <th>roar_status_changed</th>\n",
" <th>roar_type</th>\n",
" <th>roar_succeeds</th>\n",
" <th>roar_commentary</th>\n",
" <th>roar_metadata_visibility</th>\n",
" <th>roar_latitude</th>\n",
" <th>roar_longitude</th>\n",
" <th>roar_relation_type</th>\n",
" <th>roar_relation_uri</th>\n",
" <th>roar_item_issues_id</th>\n",
" <th>roar_item_issues_type</th>\n",
" <th>roar_item_issues_description</th>\n",
" <th>roar_item_issues_timestamp</th>\n",
" <th>roar_item_issues_status</th>\n",
" <th>roar_item_issues_reported_by</th>\n",
" <th>roar_item_issues_resolved_by</th>\n",
" <th>roar_item_issues_comment</th>\n",
" <th>roar_item_issues_count</th>\n",
" <th>roar_sword_depositor</th>\n",
" <th>roar_sword_slug</th>\n",
" <th>roar_exemplar</th>\n",
" <th>roar_home_page</th>\n",
" <th>roar_title</th>\n",
" <th>roar_oai_pmh</th>\n",
" <th>roar_sword_endpoint</th>\n",
" <th>roar_rss_feed</th>\n",
" <th>roar_twitter_feed</th>\n",
" <th>roar_description</th>\n",
" <th>roar_fulltext</th>\n",
" <th>roar_open_access</th>\n",
" <th>roar_mandate</th>\n",
" <th>roar_organisation_title</th>\n",
" <th>roar_organisation_home_page</th>\n",
" <th>roar_location_country</th>\n",
" <th>roar_location_city</th>\n",
" <th>roar_location_latitude</th>\n",
" <th>roar_location_longitude</th>\n",
" <th>roar_software</th>\n",
" <th>roar_geoname</th>\n",
" <th>roar_version</th>\n",
" <th>roar_subjects</th>\n",
" <th>roar_date</th>\n",
" <th>roar_note</th>\n",
" <th>roar_suggestions</th>\n",
" <th>roar_activity_low</th>\n",
" <th>roar_activity_medium</th>\n",
" <th>roar_activity_high</th>\n",
" <th>roar_recordcount</th>\n",
" <th>roar_recordhistory</th>\n",
" <th>roar_fulltexts_total</th>\n",
" <th>roar_fulltexts_docs</th>\n",
" <th>roar_fulltexts_rtotal</th>\n",
" <th>roar_fulltexts_rdocs</th>\n",
" <th>roar_registry_name</th>\n",
" <th>roar_registry_id</th>\n",
" <th>roar_submit_to</th>\n",
" <th>roar_submitted_to_name</th>\n",
" <th>roar_submitted_to_done</th>\n",
" <th>roar_webometrics_rank</th>\n",
" <th>roar_webometrics_size</th>\n",
" <th>roar_webometrics_visibility</th>\n",
" <th>roar_webometrics_rich_files</th>\n",
" <th>roar_webometrics_scholar</th>\n",
" <th>roar_monthly_deposits</th>\n",
" <th>roar_total_deposits</th>\n",
" <th>roar_association</th>\n",
" <th>roar_unique_id</th>\n",
" <th>source_set</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>dedup::01b6397888c09d84f3dc89d807aa1004</td>\n",
" <td>[4745, opendoar____::a9365bd906e11324065c35be4...</td>\n",
" <td>[4745, 2429, 4320]</td>\n",
" <td>[RU-Económicas, ru-económicas, ru económicas]</td>\n",
" <td>[roar, OpenDOAR, OpenDOAR]</td>\n",
" <td>[roar_4745, OpenDOAR_2429, OpenDOAR_4320]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, 2429, 4320]</td>\n",
" <td>[nan, {\"name\": \"ru-econ\\u00f3micas\", \"language...</td>\n",
" <td>[nan, [], []]</td>\n",
" <td>[nan, http://ru.iiec.unam.mx/, http://ru.iiec....</td>\n",
" <td>[nan, this site provides access to the researc...</td>\n",
" <td>[nan, institutional, institutional]</td>\n",
" <td>[nan, [\"es\"], [\"es\"]]</td>\n",
" <td>[nan, 2021-09-13 13:35:56, 2021-09-13 13:36:17]</td>\n",
" <td>[nan, 2012-02-28 12:12:09, 2019-02-19 10:51:49]</td>\n",
" <td>[nan, [\"multidisciplinary\"], [\"business and ec...</td>\n",
" <td>[nan, [\"journal_articles\", \"theses_and_dissert...</td>\n",
" <td>[nan, [{\"name\": \"universidad nacional aut\\u00f...</td>\n",
" <td>[nan, [{\"policy_url\": \"http://ru.iiec.unam.mx/...</td>\n",
" <td>[nan, {\"name\": \"eprints\", \"version\": \"3.3.15\"}...</td>\n",
" <td>[nan, http://ru.iiec.unam.mx/cgi/oai2, nan]</td>\n",
" <td>[nan, OpenDOAR_2429, OpenDOAR_4320]</td>\n",
" <td>[4745, nan, nan]</td>\n",
" <td>[31, nan, nan]</td>\n",
" <td>[archive, nan, nan]</td>\n",
" <td>[1447, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[disk0/00/00/47/45, nan, nan]</td>\n",
" <td>[2012-02-05 14:27:15, nan, nan]</td>\n",
" <td>[2012-04-16 10:34:36, nan, nan]</td>\n",
" <td>[2012-02-05 14:27:15, nan, nan]</td>\n",
" <td>[institutional, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[show, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[http://ru.iiec.unam.mx, nan, nan]</td>\n",
" <td>[RU-Económicas, nan, nan]</td>\n",
" <td>[http://ru.iiec.unam.mx/cgi/oai2, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[http://ru.iiec.unam.mx/cgi/latest_tool?output...</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[Productos académicos del Instituto de Investi...</td>\n",
" <td>[TRUE, nan, nan]</td>\n",
" <td>[TRUE, nan, nan]</td>\n",
" <td>[TRUE, nan, nan]</td>\n",
" <td>[Instituto de Investigaciones Económicas UNAM,...</td>\n",
" <td>[http://www.iiec.unam.mx/, nan, nan]</td>\n",
" <td>[mx, nan, nan]</td>\n",
" <td>[Mexico, nan, nan]</td>\n",
" <td>[19.3162, nan, nan]</td>\n",
" <td>[-99.1799, nan, nan]</td>\n",
" <td>[eprints, nan, nan]</td>\n",
" <td>[geoname_2_MX, nan, nan]</td>\n",
" <td>[3.3.15 eps, nan, nan]</td>\n",
" <td>[[G1, HG, HX, HB, HN, JA, HJ, H1, HF, T1, HD, ...</td>\n",
" <td>[2012-02-03 05:18:16, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[0, nan, nan]</td>\n",
" <td>[0, nan, nan]</td>\n",
" <td>[0, nan, nan]</td>\n",
" <td>[94, nan, nan]</td>\n",
" <td>[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,7...</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[[opendoar, celestial], nan, nan]</td>\n",
" <td>[[4818, 2429], nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[roar_4745, nan, nan]</td>\n",
" <td>{roar, OpenDOAR}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>dedup::03db60c2331018b18c4166c1787072fe</td>\n",
" <td>[opendoar____::78bc62d08a9a0b9b0b9c0ad339ef82d...</td>\n",
" <td>[3087, 4500, 8504]</td>\n",
" <td>[landmark university repository, landmark univ...</td>\n",
" <td>[OpenDOAR, OpenDOAR, roar]</td>\n",
" <td>[OpenDOAR_3087, OpenDOAR_4500, roar_8504]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[3087, 4500, nan]</td>\n",
" <td>[{\"name\": \"landmark university repository\", \"l...</td>\n",
" <td>[[], [], nan]</td>\n",
" <td>[http://eprints.lmu.edu.ng/, http://eprints.lm...</td>\n",
" <td>[this site provides access to the multi-discip...</td>\n",
" <td>[institutional, institutional, nan]</td>\n",
" <td>[[\"en\"], [\"en\"], nan]</td>\n",
" <td>[2021-09-13 13:36:06, 2021-02-18 18:01:12, nan]</td>\n",
" <td>[2014-06-16 13:36:00, 2019-03-26 14:07:30, nan]</td>\n",
" <td>[[\"multidisciplinary\"], [\"multidisciplinary\"],...</td>\n",
" <td>[[\"journal_articles\"], [\"journal_articles\", \"b...</td>\n",
" <td>[[{\"name\": \"landmark university\", \"alternative...</td>\n",
" <td>[[{\"policy_url\": \"http://eprints.lmu.edu.ng/po...</td>\n",
" <td>[{\"name\": \"eprints\", \"version\": \"3.3.12\"}, {\"n...</td>\n",
" <td>[http://eprints.lmu.edu.ng/cgi/oai2, nan, nan]</td>\n",
" <td>[OpenDOAR_3087, OpenDOAR_4500, nan]</td>\n",
" <td>[nan, nan, 8504]</td>\n",
" <td>[nan, nan, 12]</td>\n",
" <td>[nan, nan, archive]</td>\n",
" <td>[nan, nan, 5459]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, disk0/00/00/85/04]</td>\n",
" <td>[nan, nan, 2014-06-24 10:14:07]</td>\n",
" <td>[nan, nan, 2014-06-28 01:38:49]</td>\n",
" <td>[nan, nan, 2014-06-24 10:14:07]</td>\n",
" <td>[nan, nan, institutional]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, show]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, http://eprints.lmu.edu.ng]</td>\n",
" <td>[nan, nan, Landmark University Repository]</td>\n",
" <td>[nan, nan, http://eprints.lmu.edu.ng/cgi/oai]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, http://eprints.lmu.edu.ng/cgi/lates...</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, TRUE]</td>\n",
" <td>[nan, nan, TRUE]</td>\n",
" <td>[nan, nan, TRUE]</td>\n",
" <td>[nan, nan, Landmark University]</td>\n",
" <td>[nan, nan, http://lmu.edu.ng]</td>\n",
" <td>[nan, nan, ng]</td>\n",
" <td>[nan, nan, Omu-Aran]</td>\n",
" <td>[nan, nan, 8.12421]</td>\n",
" <td>[nan, nan, 5.09488]</td>\n",
" <td>[nan, nan, eprints]</td>\n",
" <td>[nan, nan, geoname_2_NG]</td>\n",
" <td>[nan, nan, 3.3.16 eps]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, 2014-06-07 22:16:23]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, [opendoar, celestial]]</td>\n",
" <td>[nan, nan, [5621, 3087]]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, roar_8504]</td>\n",
" <td>{OpenDOAR, roar}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>dedup::05128e44e27c36bdba71221bfccf735d</td>\n",
" <td>[opendoar____::426f990b332ef8193a61cc90516c124...</td>\n",
" <td>[2318, 5503, 4271]</td>\n",
" <td>[iława biblioteka cyrfrowa (iława digital libr...</td>\n",
" <td>[OpenDOAR, roar, roar]</td>\n",
" <td>[OpenDOAR_2318, roar_5503, roar_4271]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[2318, nan, nan]</td>\n",
" <td>[{\"name\": \"i\\u0142awa biblioteka cyrfrowa (i\\u...</td>\n",
" <td>[[], nan, nan]</td>\n",
" <td>[http://ibc.ilawa.pl/dlibra, nan, nan]</td>\n",
" <td>[this site provides access to digitised articl...</td>\n",
" <td>[governmental, nan, nan]</td>\n",
" <td>[[\"pl\"], nan, nan]</td>\n",
" <td>[2019-10-17 14:34:36, nan, nan]</td>\n",
" <td>[2011-10-11 13:13:58, nan, nan]</td>\n",
" <td>[[\"multidisciplinary\"], nan, nan]</td>\n",
" <td>[[\"journal_articles\"], nan, nan]</td>\n",
" <td>[[{\"name\": \"i\\u0142awa\", \"alternativeName\": \"\"...</td>\n",
" <td>[[], nan, nan]</td>\n",
" <td>[{\"name\": \"dlibra\", \"version\": \"4\"}, nan, nan]</td>\n",
" <td>[http://ibc.ilawa.pl/dlibra/oai-pmh-repository...</td>\n",
" <td>[OpenDOAR_2318, nan, nan]</td>\n",
" <td>[nan, 5503, 4271]</td>\n",
" <td>[nan, 9, 11]</td>\n",
" <td>[nan, archive, archive]</td>\n",
" <td>[nan, 8, 8]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, disk0/00/00/55/03, disk0/00/00/42/71]</td>\n",
" <td>[nan, 2012-11-19 20:33:30, 2011-10-27 01:25:14]</td>\n",
" <td>[nan, 2012-11-26 06:53:42, 2011-12-19 07:07:23]</td>\n",
" <td>[nan, 2012-11-19 20:33:30, 2011-10-27 01:25:14]</td>\n",
" <td>[nan, other, other]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, show, show]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, http://ibc.ilawa.pl/dlibra, http://ibc.i...</td>\n",
" <td>[nan, Iława Biblioteka Cyrfrowa (Iława Digital...</td>\n",
" <td>[nan, http://ibc.ilawa.pl/dlibra/oai-pmh-repos...</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, This site provides access to digitised a...</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, Iława, Iława]</td>\n",
" <td>[nan, http://www.ilawa.pl/_portal, http://www....</td>\n",
" <td>[nan, pl, pl]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, 53.596, 53.596]</td>\n",
" <td>[nan, 19.5684, 19.5684]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, geoname_2_PL, geoname_2_PL]</td>\n",
" <td>[nan, other, other]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, 2012-07-01 15:13:09, 2009-10-12 10:46:08]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, 0, 0]</td>\n",
" <td>[nan, 0, 0]</td>\n",
" <td>[nan, 0, 0]</td>\n",
" <td>[nan, 20, 20]</td>\n",
" <td>[nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,...</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, [opendoar, celestial], [opendoar, celest...</td>\n",
" <td>[nan, [2318, 4672], [2318, 4672]]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, roar_5503, roar_4271]</td>\n",
" <td>{OpenDOAR, roar}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>dedup::069059b7ef840f0c74a814ec9237b6ec</td>\n",
" <td>[5711, 126, opendoar____::1cd3882394520876dc88...</td>\n",
" <td>[5711, 126, 1509]</td>\n",
" <td>[Bibioteca Digital Ação Educativa, Biblioteca ...</td>\n",
" <td>[roar, roar, OpenDOAR]</td>\n",
" <td>[roar_5711, roar_126, OpenDOAR_1509]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, 1509]</td>\n",
" <td>[nan, nan, {\"name\": \"bibioteca digital a\\u00e7...</td>\n",
" <td>[nan, nan, []]</td>\n",
" <td>[nan, nan, http://www.bdae.org.br/dspace/]</td>\n",
" <td>[nan, nan, this site provides access to the ou...</td>\n",
" <td>[nan, nan, institutional]</td>\n",
" <td>[nan, nan, [\"pt\"]]</td>\n",
" <td>[nan, nan, 2019-10-17 14:34:23]</td>\n",
" <td>[nan, nan, 2009-05-01 10:10:47]</td>\n",
" <td>[nan, nan, [\"education\"]]</td>\n",
" <td>[nan, nan, [\"theses_and_dissertations\", \"unpub...</td>\n",
" <td>[nan, nan, [{\"name\": \"a\\u00e7\\u00e3o educativa...</td>\n",
" <td>[nan, nan, []]</td>\n",
" <td>[nan, nan, {\"name\": \"dspace\", \"version\": \"\"}]</td>\n",
" <td>[nan, nan, http://www.bdae.org.br/dspace-oai/r...</td>\n",
" <td>[nan, nan, OpenDOAR_1509]</td>\n",
" <td>[5711, 126, nan]</td>\n",
" <td>[9, 503, nan]</td>\n",
" <td>[archive, archive, nan]</td>\n",
" <td>[8, 1, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[disk0/00/00/57/11, disk0/00/00/01/26, nan]</td>\n",
" <td>[2012-12-12 04:37:14, 2010-01-06 13:43:56, nan]</td>\n",
" <td>[2012-12-17 06:53:38, 2011-07-18 05:42:07, nan]</td>\n",
" <td>[2012-12-12 04:37:14, 2010-01-06 13:43:56, nan]</td>\n",
" <td>[institutional, other, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[show, show, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, 0, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[http://www.bdae.org.br/dspace/, http://www.bd...</td>\n",
" <td>[Bibioteca Digital Ação Educativa, Biblioteca ...</td>\n",
" <td>[http://www.bdae.org.br/dspace-oai/request, ht...</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[This site provides access to the output of th...</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[Ação Educativa, Ação Educativa, nan]</td>\n",
" <td>[http://www.acaoeducativa.org/, http://www.aca...</td>\n",
" <td>[br, br, nan]</td>\n",
" <td>[nan, São Paulo, nan]</td>\n",
" <td>[-23.5445, -23.5445, nan]</td>\n",
" <td>[-46.6509, -46.6509, nan]</td>\n",
" <td>[dspace, dspace, nan]</td>\n",
" <td>[geoname_2_BR, geoname_2_BR, nan]</td>\n",
" <td>[other, other, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[2012-07-22 15:12:34, 2008-03-31 20:07:33, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[0, 0, nan]</td>\n",
" <td>[0, 0, nan]</td>\n",
" <td>[0, 0, nan]</td>\n",
" <td>[100, 100, nan]</td>\n",
" <td>[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,97,100,...</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[[opendoar, celestial], [opendoar, celestial],...</td>\n",
" <td>[[1509, 1430], [1509, 1430], nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[roar_5711, roar_126, nan]</td>\n",
" <td>{roar, OpenDOAR}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>dedup::0e139b17a92b2df7d6c3c840e51465fe</td>\n",
" <td>[4379, 4266, opendoar____::f976b57bb9dd27aa2e7...</td>\n",
" <td>[4379, 4266, 2306]</td>\n",
" <td>[Institutional Repository of Ningbo Institute ...</td>\n",
" <td>[roar, roar, OpenDOAR]</td>\n",
" <td>[roar_4379, roar_4266, OpenDOAR_2306]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, 2306]</td>\n",
" <td>[nan, nan, {\"name\": \"institutional repository ...</td>\n",
" <td>[nan, nan, [{\"acronym\": \"nimte openir\"}]]</td>\n",
" <td>[nan, nan, http://ir.nimte.ac.cn/]</td>\n",
" <td>[nan, nan, this site provides access to the ou...</td>\n",
" <td>[nan, nan, institutional]</td>\n",
" <td>[nan, nan, [\"zh\", \"en\"]]</td>\n",
" <td>[nan, nan, 2019-10-17 14:34:36]</td>\n",
" <td>[nan, nan, 2011-10-10 13:13:11]</td>\n",
" <td>[nan, nan, [\"technology general\", \"mechanical ...</td>\n",
" <td>[nan, nan, [\"journal_articles\", \"bibliographic...</td>\n",
" <td>[nan, nan, [{\"name\": \"chinese academy of scien...</td>\n",
" <td>[nan, nan, []]</td>\n",
" <td>[nan, nan, {\"name\": \"dspace\", \"version\": \"\"}]</td>\n",
" <td>[nan, nan, http://ir.nimte.ac.cn/casirgrid-oai...</td>\n",
" <td>[nan, nan, OpenDOAR_2306]</td>\n",
" <td>[4379, 4266, nan]</td>\n",
" <td>[15, 11, nan]</td>\n",
" <td>[archive, archive, nan]</td>\n",
" <td>[986, 8, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[disk0/00/00/43/79, disk0/00/00/42/66, nan]</td>\n",
" <td>[2011-11-09 23:16:22, 2011-10-27 01:26:05, nan]</td>\n",
" <td>[2011-12-21 15:25:04, 2011-12-19 07:07:21, nan]</td>\n",
" <td>[2011-11-09 23:16:22, 2011-10-27 01:26:05, nan]</td>\n",
" <td>[institutional, institutional, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[show, show, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[http://ir.nimte.ac.cn/, http://ir.nimte.ac.cn...</td>\n",
" <td>[Institutional Repository of Ningbo Institute ...</td>\n",
" <td>[http://ir.nimte.ac.cn/casirgrid-oai/request, ...</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[This site provides access to the output of th...</td>\n",
" <td>[TRUE, nan, nan]</td>\n",
" <td>[TRUE, nan, nan]</td>\n",
" <td>[FALSE, nan, nan]</td>\n",
" <td>[Ningbo Institute of Material Technology &amp; Eng...</td>\n",
" <td>[http://www.nimte.ac.cn/, http://www.cas.cn/, ...</td>\n",
" <td>[cn, cn, nan]</td>\n",
" <td>[ningbo, nan, nan]</td>\n",
" <td>[29.8807, 29.8807, nan]</td>\n",
" <td>[121.672, 121.672, nan]</td>\n",
" <td>[dspace, dspace, nan]</td>\n",
" <td>[geoname_2_CN, geoname_2_CN, nan]</td>\n",
" <td>[other, other, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[2009-12-21 02:27:07, 2009-12-21 02:27:07, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[0, 0, nan]</td>\n",
" <td>[0, 0, nan]</td>\n",
" <td>[0, 0, nan]</td>\n",
" <td>[100, 100, nan]</td>\n",
" <td>[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0...</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[celestial, [opendoar, celestial], nan]</td>\n",
" <td>[4668, [2306, 4668], nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[nan, nan, nan]</td>\n",
" <td>[roar_4379, roar_4266, nan]</td>\n",
" <td>{roar, OpenDOAR}</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" dedup_id \\\n",
"0 dedup::01b6397888c09d84f3dc89d807aa1004 \n",
"1 dedup::03db60c2331018b18c4166c1787072fe \n",
"2 dedup::05128e44e27c36bdba71221bfccf735d \n",
"3 dedup::069059b7ef840f0c74a814ec9237b6ec \n",
"4 dedup::0e139b17a92b2df7d6c3c840e51465fe \n",
"\n",
" duplicate_id original_id \\\n",
"0 [4745, opendoar____::a9365bd906e11324065c35be4... [4745, 2429, 4320] \n",
"1 [opendoar____::78bc62d08a9a0b9b0b9c0ad339ef82d... [3087, 4500, 8504] \n",
"2 [opendoar____::426f990b332ef8193a61cc90516c124... [2318, 5503, 4271] \n",
"3 [5711, 126, opendoar____::1cd3882394520876dc88... [5711, 126, 1509] \n",
"4 [4379, 4266, opendoar____::f976b57bb9dd27aa2e7... [4379, 4266, 2306] \n",
"\n",
" name \\\n",
"0 [RU-Económicas, ru-económicas, ru económicas] \n",
"1 [landmark university repository, landmark univ... \n",
"2 [iława biblioteka cyrfrowa (iława digital libr... \n",
"3 [Bibioteca Digital Ação Educativa, Biblioteca ... \n",
"4 [Institutional Repository of Ningbo Institute ... \n",
"\n",
" source unique_id \\\n",
"0 [roar, OpenDOAR, OpenDOAR] [roar_4745, OpenDOAR_2429, OpenDOAR_4320] \n",
"1 [OpenDOAR, OpenDOAR, roar] [OpenDOAR_3087, OpenDOAR_4500, roar_8504] \n",
"2 [OpenDOAR, roar, roar] [OpenDOAR_2318, roar_5503, roar_4271] \n",
"3 [roar, roar, OpenDOAR] [roar_5711, roar_126, OpenDOAR_1509] \n",
"4 [roar, roar, OpenDOAR] [roar_4379, roar_4266, OpenDOAR_2306] \n",
"\n",
" FAIRsharing_id FAIRsharing_type FAIRsharing_attributes.created-at \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.updated-at FAIRsharing_attributes.metadata.doi \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.name FAIRsharing_attributes.metadata.status \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.contacts \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.homepage \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.identifier \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.description \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.support-links \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.year-creation \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.data-processes \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.legacy-ids \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.fairsharing-registry \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.record-type FAIRsharing_attributes.subjects \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.domains FAIRsharing_attributes.taxonomies \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.user-defined-tags FAIRsharing_attributes.countries \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.name FAIRsharing_attributes.abbreviation \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.url FAIRsharing_attributes.doi \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.fairsharing-licence \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.description FAIRsharing_attributes.publications \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.licence-links \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.citations \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.abbreviation \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.access-points \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.associated-tools \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.deprecation-date \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.deprecation-reason \\\n",
"0 [nan, nan, nan] \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.tombstone FAIRsharing_unique_id \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" re3data_orgIdentifier re3data_repositoryName \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" re3data_repositoryName.language re3data_additionalName \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" re3data_repositoryURL re3data_repositoryIdentifier \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" re3data_repositoryContact re3data_description re3data_description.language \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" re3data_type re3data_size re3data_startDate re3data_endDate \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" re3data_repositoryLanguage re3data_subject re3data_missionStatementURL \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" re3data_contentType re3data_providerType re3data_keyword \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" re3data_institution re3data_policy re3data_databaseAccess \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" re3data_databaseLicense re3data_dataAccess re3data_dataLicense \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" re3data_dataUploadType re3data_dataUploadLicense re3data_software \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" re3data_versioning re3data_api re3data_pidSystem \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" re3data_citationGuidelineURL re3data_aidSystem re3data_enhancedPublication \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" re3data_qualityManagement re3data_certificate re3data_metadataStandard \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" re3data_syndication re3data_remarks re3data_entryDate re3data_lastUpdate \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" re3data_unique_id OpenDOAR_system_metadata.id \\\n",
"0 [nan, nan, nan] [nan, 2429, 4320] \n",
"1 [nan, nan, nan] [3087, 4500, nan] \n",
"2 [nan, nan, nan] [2318, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, 1509] \n",
"4 [nan, nan, nan] [nan, nan, 2306] \n",
"\n",
" OpenDOAR_repository_metadata.name \\\n",
"0 [nan, {\"name\": \"ru-econ\\u00f3micas\", \"language... \n",
"1 [{\"name\": \"landmark university repository\", \"l... \n",
"2 [{\"name\": \"i\\u0142awa biblioteka cyrfrowa (i\\u... \n",
"3 [nan, nan, {\"name\": \"bibioteca digital a\\u00e7... \n",
"4 [nan, nan, {\"name\": \"institutional repository ... \n",
"\n",
" OpenDOAR_repository_metadata.alternativename \\\n",
"0 [nan, [], []] \n",
"1 [[], [], nan] \n",
"2 [[], nan, nan] \n",
"3 [nan, nan, []] \n",
"4 [nan, nan, [{\"acronym\": \"nimte openir\"}]] \n",
"\n",
" OpenDOAR_repository_metadata.url \\\n",
"0 [nan, http://ru.iiec.unam.mx/, http://ru.iiec.... \n",
"1 [http://eprints.lmu.edu.ng/, http://eprints.lm... \n",
"2 [http://ibc.ilawa.pl/dlibra, nan, nan] \n",
"3 [nan, nan, http://www.bdae.org.br/dspace/] \n",
"4 [nan, nan, http://ir.nimte.ac.cn/] \n",
"\n",
" OpenDOAR_repository_metadata.description \\\n",
"0 [nan, this site provides access to the researc... \n",
"1 [this site provides access to the multi-discip... \n",
"2 [this site provides access to digitised articl... \n",
"3 [nan, nan, this site provides access to the ou... \n",
"4 [nan, nan, this site provides access to the ou... \n",
"\n",
" OpenDOAR_repository_metadata.type \\\n",
"0 [nan, institutional, institutional] \n",
"1 [institutional, institutional, nan] \n",
"2 [governmental, nan, nan] \n",
"3 [nan, nan, institutional] \n",
"4 [nan, nan, institutional] \n",
"\n",
" OpenDOAR_repository_metadata.content_languages \\\n",
"0 [nan, [\"es\"], [\"es\"]] \n",
"1 [[\"en\"], [\"en\"], nan] \n",
"2 [[\"pl\"], nan, nan] \n",
"3 [nan, nan, [\"pt\"]] \n",
"4 [nan, nan, [\"zh\", \"en\"]] \n",
"\n",
" OpenDOAR_system_metadata.date_modified \\\n",
"0 [nan, 2021-09-13 13:35:56, 2021-09-13 13:36:17] \n",
"1 [2021-09-13 13:36:06, 2021-02-18 18:01:12, nan] \n",
"2 [2019-10-17 14:34:36, nan, nan] \n",
"3 [nan, nan, 2019-10-17 14:34:23] \n",
"4 [nan, nan, 2019-10-17 14:34:36] \n",
"\n",
" OpenDOAR_system_metadata.date_created \\\n",
"0 [nan, 2012-02-28 12:12:09, 2019-02-19 10:51:49] \n",
"1 [2014-06-16 13:36:00, 2019-03-26 14:07:30, nan] \n",
"2 [2011-10-11 13:13:58, nan, nan] \n",
"3 [nan, nan, 2009-05-01 10:10:47] \n",
"4 [nan, nan, 2011-10-10 13:13:11] \n",
"\n",
" OpenDOAR_repository_metadata.content_subjects_phrases \\\n",
"0 [nan, [\"multidisciplinary\"], [\"business and ec... \n",
"1 [[\"multidisciplinary\"], [\"multidisciplinary\"],... \n",
"2 [[\"multidisciplinary\"], nan, nan] \n",
"3 [nan, nan, [\"education\"]] \n",
"4 [nan, nan, [\"technology general\", \"mechanical ... \n",
"\n",
" OpenDOAR_repository_metadata.content_types \\\n",
"0 [nan, [\"journal_articles\", \"theses_and_dissert... \n",
"1 [[\"journal_articles\"], [\"journal_articles\", \"b... \n",
"2 [[\"journal_articles\"], nan, nan] \n",
"3 [nan, nan, [\"theses_and_dissertations\", \"unpub... \n",
"4 [nan, nan, [\"journal_articles\", \"bibliographic... \n",
"\n",
" OpenDOAR_organization \\\n",
"0 [nan, [{\"name\": \"universidad nacional aut\\u00f... \n",
"1 [[{\"name\": \"landmark university\", \"alternative... \n",
"2 [[{\"name\": \"i\\u0142awa\", \"alternativeName\": \"\"... \n",
"3 [nan, nan, [{\"name\": \"a\\u00e7\\u00e3o educativa... \n",
"4 [nan, nan, [{\"name\": \"chinese academy of scien... \n",
"\n",
" OpenDOAR_policy_urls \\\n",
"0 [nan, [{\"policy_url\": \"http://ru.iiec.unam.mx/... \n",
"1 [[{\"policy_url\": \"http://eprints.lmu.edu.ng/po... \n",
"2 [[], nan, nan] \n",
"3 [nan, nan, []] \n",
"4 [nan, nan, []] \n",
"\n",
" OpenDOAR_repository_metadata.software \\\n",
"0 [nan, {\"name\": \"eprints\", \"version\": \"3.3.15\"}... \n",
"1 [{\"name\": \"eprints\", \"version\": \"3.3.12\"}, {\"n... \n",
"2 [{\"name\": \"dlibra\", \"version\": \"4\"}, nan, nan] \n",
"3 [nan, nan, {\"name\": \"dspace\", \"version\": \"\"}] \n",
"4 [nan, nan, {\"name\": \"dspace\", \"version\": \"\"}] \n",
"\n",
" OpenDOAR_repository_metadata.oai_url \\\n",
"0 [nan, http://ru.iiec.unam.mx/cgi/oai2, nan] \n",
"1 [http://eprints.lmu.edu.ng/cgi/oai2, nan, nan] \n",
"2 [http://ibc.ilawa.pl/dlibra/oai-pmh-repository... \n",
"3 [nan, nan, http://www.bdae.org.br/dspace-oai/r... \n",
"4 [nan, nan, http://ir.nimte.ac.cn/casirgrid-oai... \n",
"\n",
" OpenDOAR_unique_id roar_eprintid roar_rev_number \\\n",
"0 [nan, OpenDOAR_2429, OpenDOAR_4320] [4745, nan, nan] [31, nan, nan] \n",
"1 [OpenDOAR_3087, OpenDOAR_4500, nan] [nan, nan, 8504] [nan, nan, 12] \n",
"2 [OpenDOAR_2318, nan, nan] [nan, 5503, 4271] [nan, 9, 11] \n",
"3 [nan, nan, OpenDOAR_1509] [5711, 126, nan] [9, 503, nan] \n",
"4 [nan, nan, OpenDOAR_2306] [4379, 4266, nan] [15, 11, nan] \n",
"\n",
" roar_eprint_status roar_userid roar_importid \\\n",
"0 [archive, nan, nan] [1447, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, archive] [nan, nan, 5459] [nan, nan, nan] \n",
"2 [nan, archive, archive] [nan, 8, 8] [nan, nan, nan] \n",
"3 [archive, archive, nan] [8, 1, nan] [nan, nan, nan] \n",
"4 [archive, archive, nan] [986, 8, nan] [nan, nan, nan] \n",
"\n",
" roar_source roar_dir \\\n",
"0 [nan, nan, nan] [disk0/00/00/47/45, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, disk0/00/00/85/04] \n",
"2 [nan, nan, nan] [nan, disk0/00/00/55/03, disk0/00/00/42/71] \n",
"3 [nan, nan, nan] [disk0/00/00/57/11, disk0/00/00/01/26, nan] \n",
"4 [nan, nan, nan] [disk0/00/00/43/79, disk0/00/00/42/66, nan] \n",
"\n",
" roar_datestamp \\\n",
"0 [2012-02-05 14:27:15, nan, nan] \n",
"1 [nan, nan, 2014-06-24 10:14:07] \n",
"2 [nan, 2012-11-19 20:33:30, 2011-10-27 01:25:14] \n",
"3 [2012-12-12 04:37:14, 2010-01-06 13:43:56, nan] \n",
"4 [2011-11-09 23:16:22, 2011-10-27 01:26:05, nan] \n",
"\n",
" roar_lastmod \\\n",
"0 [2012-04-16 10:34:36, nan, nan] \n",
"1 [nan, nan, 2014-06-28 01:38:49] \n",
"2 [nan, 2012-11-26 06:53:42, 2011-12-19 07:07:23] \n",
"3 [2012-12-17 06:53:38, 2011-07-18 05:42:07, nan] \n",
"4 [2011-12-21 15:25:04, 2011-12-19 07:07:21, nan] \n",
"\n",
" roar_status_changed \\\n",
"0 [2012-02-05 14:27:15, nan, nan] \n",
"1 [nan, nan, 2014-06-24 10:14:07] \n",
"2 [nan, 2012-11-19 20:33:30, 2011-10-27 01:25:14] \n",
"3 [2012-12-12 04:37:14, 2010-01-06 13:43:56, nan] \n",
"4 [2011-11-09 23:16:22, 2011-10-27 01:26:05, nan] \n",
"\n",
" roar_type roar_succeeds roar_commentary \\\n",
"0 [institutional, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, institutional] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, other, other] [nan, nan, nan] [nan, nan, nan] \n",
"3 [institutional, other, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [institutional, institutional, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_metadata_visibility roar_latitude roar_longitude \\\n",
"0 [show, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, show] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, show, show] [nan, nan, nan] [nan, nan, nan] \n",
"3 [show, show, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [show, show, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_relation_type roar_relation_uri roar_item_issues_id \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_item_issues_type roar_item_issues_description \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_item_issues_timestamp roar_item_issues_status \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_item_issues_reported_by roar_item_issues_resolved_by \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_item_issues_comment roar_item_issues_count roar_sword_depositor \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, 0, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_sword_slug roar_exemplar \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_home_page \\\n",
"0 [http://ru.iiec.unam.mx, nan, nan] \n",
"1 [nan, nan, http://eprints.lmu.edu.ng] \n",
"2 [nan, http://ibc.ilawa.pl/dlibra, http://ibc.i... \n",
"3 [http://www.bdae.org.br/dspace/, http://www.bd... \n",
"4 [http://ir.nimte.ac.cn/, http://ir.nimte.ac.cn... \n",
"\n",
" roar_title \\\n",
"0 [RU-Económicas, nan, nan] \n",
"1 [nan, nan, Landmark University Repository] \n",
"2 [nan, Iława Biblioteka Cyrfrowa (Iława Digital... \n",
"3 [Bibioteca Digital Ação Educativa, Biblioteca ... \n",
"4 [Institutional Repository of Ningbo Institute ... \n",
"\n",
" roar_oai_pmh roar_sword_endpoint \\\n",
"0 [http://ru.iiec.unam.mx/cgi/oai2, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, http://eprints.lmu.edu.ng/cgi/oai] [nan, nan, nan] \n",
"2 [nan, http://ibc.ilawa.pl/dlibra/oai-pmh-repos... [nan, nan, nan] \n",
"3 [http://www.bdae.org.br/dspace-oai/request, ht... [nan, nan, nan] \n",
"4 [http://ir.nimte.ac.cn/casirgrid-oai/request, ... [nan, nan, nan] \n",
"\n",
" roar_rss_feed roar_twitter_feed \\\n",
"0 [http://ru.iiec.unam.mx/cgi/latest_tool?output... [nan, nan, nan] \n",
"1 [nan, nan, http://eprints.lmu.edu.ng/cgi/lates... [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_description roar_fulltext \\\n",
"0 [Productos académicos del Instituto de Investi... [TRUE, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, TRUE] \n",
"2 [nan, This site provides access to digitised a... [nan, nan, nan] \n",
"3 [This site provides access to the output of th... [nan, nan, nan] \n",
"4 [This site provides access to the output of th... [TRUE, nan, nan] \n",
"\n",
" roar_open_access roar_mandate \\\n",
"0 [TRUE, nan, nan] [TRUE, nan, nan] \n",
"1 [nan, nan, TRUE] [nan, nan, TRUE] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [TRUE, nan, nan] [FALSE, nan, nan] \n",
"\n",
" roar_organisation_title \\\n",
"0 [Instituto de Investigaciones Económicas UNAM,... \n",
"1 [nan, nan, Landmark University] \n",
"2 [nan, Iława, Iława] \n",
"3 [Ação Educativa, Ação Educativa, nan] \n",
"4 [Ningbo Institute of Material Technology & Eng... \n",
"\n",
" roar_organisation_home_page roar_location_country \\\n",
"0 [http://www.iiec.unam.mx/, nan, nan] [mx, nan, nan] \n",
"1 [nan, nan, http://lmu.edu.ng] [nan, nan, ng] \n",
"2 [nan, http://www.ilawa.pl/_portal, http://www.... [nan, pl, pl] \n",
"3 [http://www.acaoeducativa.org/, http://www.aca... [br, br, nan] \n",
"4 [http://www.nimte.ac.cn/, http://www.cas.cn/, ... [cn, cn, nan] \n",
"\n",
" roar_location_city roar_location_latitude \\\n",
"0 [Mexico, nan, nan] [19.3162, nan, nan] \n",
"1 [nan, nan, Omu-Aran] [nan, nan, 8.12421] \n",
"2 [nan, nan, nan] [nan, 53.596, 53.596] \n",
"3 [nan, São Paulo, nan] [-23.5445, -23.5445, nan] \n",
"4 [ningbo, nan, nan] [29.8807, 29.8807, nan] \n",
"\n",
" roar_location_longitude roar_software \\\n",
"0 [-99.1799, nan, nan] [eprints, nan, nan] \n",
"1 [nan, nan, 5.09488] [nan, nan, eprints] \n",
"2 [nan, 19.5684, 19.5684] [nan, nan, nan] \n",
"3 [-46.6509, -46.6509, nan] [dspace, dspace, nan] \n",
"4 [121.672, 121.672, nan] [dspace, dspace, nan] \n",
"\n",
" roar_geoname roar_version \\\n",
"0 [geoname_2_MX, nan, nan] [3.3.15 eps, nan, nan] \n",
"1 [nan, nan, geoname_2_NG] [nan, nan, 3.3.16 eps] \n",
"2 [nan, geoname_2_PL, geoname_2_PL] [nan, other, other] \n",
"3 [geoname_2_BR, geoname_2_BR, nan] [other, other, nan] \n",
"4 [geoname_2_CN, geoname_2_CN, nan] [other, other, nan] \n",
"\n",
" roar_subjects \\\n",
"0 [[G1, HG, HX, HB, HN, JA, HJ, H1, HF, T1, HD, ... \n",
"1 [nan, nan, nan] \n",
"2 [nan, nan, nan] \n",
"3 [nan, nan, nan] \n",
"4 [nan, nan, nan] \n",
"\n",
" roar_date roar_note \\\n",
"0 [2012-02-03 05:18:16, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, 2014-06-07 22:16:23] [nan, nan, nan] \n",
"2 [nan, 2012-07-01 15:13:09, 2009-10-12 10:46:08] [nan, nan, nan] \n",
"3 [2012-07-22 15:12:34, 2008-03-31 20:07:33, nan] [nan, nan, nan] \n",
"4 [2009-12-21 02:27:07, 2009-12-21 02:27:07, nan] [nan, nan, nan] \n",
"\n",
" roar_suggestions roar_activity_low roar_activity_medium roar_activity_high \\\n",
"0 [nan, nan, nan] [0, nan, nan] [0, nan, nan] [0, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, 0, 0] [nan, 0, 0] [nan, 0, 0] \n",
"3 [nan, nan, nan] [0, 0, nan] [0, 0, nan] [0, 0, nan] \n",
"4 [nan, nan, nan] [0, 0, nan] [0, 0, nan] [0, 0, nan] \n",
"\n",
" roar_recordcount roar_recordhistory \\\n",
"0 [94, nan, nan] [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,7... \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, 20, 20] [nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,... \n",
"3 [100, 100, nan] [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,97,100,... \n",
"4 [100, 100, nan] [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0... \n",
"\n",
" roar_fulltexts_total roar_fulltexts_docs roar_fulltexts_rtotal \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_fulltexts_rdocs roar_registry_name \\\n",
"0 [nan, nan, nan] [[opendoar, celestial], nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, [opendoar, celestial]] \n",
"2 [nan, nan, nan] [nan, [opendoar, celestial], [opendoar, celest... \n",
"3 [nan, nan, nan] [[opendoar, celestial], [opendoar, celestial],... \n",
"4 [nan, nan, nan] [celestial, [opendoar, celestial], nan] \n",
"\n",
" roar_registry_id roar_submit_to roar_submitted_to_name \\\n",
"0 [[4818, 2429], nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, [5621, 3087]] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, [2318, 4672], [2318, 4672]] [nan, nan, nan] [nan, nan, nan] \n",
"3 [[1509, 1430], [1509, 1430], nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [4668, [2306, 4668], nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_submitted_to_done roar_webometrics_rank roar_webometrics_size \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_webometrics_visibility roar_webometrics_rich_files \\\n",
"0 [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_webometrics_scholar roar_monthly_deposits roar_total_deposits \\\n",
"0 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"1 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"2 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"3 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"4 [nan, nan, nan] [nan, nan, nan] [nan, nan, nan] \n",
"\n",
" roar_association roar_unique_id source_set \n",
"0 [nan, nan, nan] [roar_4745, nan, nan] {roar, OpenDOAR} \n",
"1 [nan, nan, nan] [nan, nan, roar_8504] {OpenDOAR, roar} \n",
"2 [nan, nan, nan] [nan, roar_5503, roar_4271] {OpenDOAR, roar} \n",
"3 [nan, nan, nan] [roar_5711, roar_126, nan] {roar, OpenDOAR} \n",
"4 [nan, nan, nan] [roar_4379, roar_4266, nan] {roar, OpenDOAR} "
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_hybrid = dup_hybrid.groupby('dedup_id').aggregate(list).reset_index()\n",
"dup_hybrid['source_set'] = dup_hybrid.source.map(set)\n",
"dup_hybrid.head()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<ipython-input-29-7abf9225ca42>:1: PerformanceWarning:\n",
"\n",
"DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
"\n",
"<ipython-input-29-7abf9225ca42>:2: PerformanceWarning:\n",
"\n",
"DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider using pd.concat instead. To get a de-fragmented frame, use `newframe = frame.copy()`\n",
"\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>dedup_id</th>\n",
" <th>duplicate_id</th>\n",
" <th>original_id</th>\n",
" <th>name</th>\n",
" <th>source</th>\n",
" <th>unique_id</th>\n",
" <th>FAIRsharing_id</th>\n",
" <th>FAIRsharing_type</th>\n",
" <th>FAIRsharing_attributes.created-at</th>\n",
" <th>FAIRsharing_attributes.updated-at</th>\n",
" <th>FAIRsharing_attributes.metadata.doi</th>\n",
" <th>FAIRsharing_attributes.metadata.name</th>\n",
" <th>FAIRsharing_attributes.metadata.status</th>\n",
" <th>FAIRsharing_attributes.metadata.contacts</th>\n",
" <th>FAIRsharing_attributes.metadata.homepage</th>\n",
" <th>FAIRsharing_attributes.metadata.identifier</th>\n",
" <th>FAIRsharing_attributes.metadata.description</th>\n",
" <th>FAIRsharing_attributes.metadata.support-links</th>\n",
" <th>FAIRsharing_attributes.metadata.year-creation</th>\n",
" <th>FAIRsharing_attributes.metadata.data-processes</th>\n",
" <th>FAIRsharing_attributes.legacy-ids</th>\n",
" <th>FAIRsharing_attributes.fairsharing-registry</th>\n",
" <th>FAIRsharing_attributes.record-type</th>\n",
" <th>FAIRsharing_attributes.subjects</th>\n",
" <th>FAIRsharing_attributes.domains</th>\n",
" <th>FAIRsharing_attributes.taxonomies</th>\n",
" <th>FAIRsharing_attributes.user-defined-tags</th>\n",
" <th>FAIRsharing_attributes.countries</th>\n",
" <th>FAIRsharing_attributes.name</th>\n",
" <th>FAIRsharing_attributes.abbreviation</th>\n",
" <th>FAIRsharing_attributes.url</th>\n",
" <th>FAIRsharing_attributes.doi</th>\n",
" <th>FAIRsharing_attributes.fairsharing-licence</th>\n",
" <th>FAIRsharing_attributes.description</th>\n",
" <th>FAIRsharing_attributes.publications</th>\n",
" <th>FAIRsharing_attributes.licence-links</th>\n",
" <th>FAIRsharing_attributes.metadata.citations</th>\n",
" <th>FAIRsharing_attributes.metadata.abbreviation</th>\n",
" <th>FAIRsharing_attributes.metadata.access-points</th>\n",
" <th>FAIRsharing_attributes.metadata.associated-tools</th>\n",
" <th>FAIRsharing_attributes.metadata.deprecation-date</th>\n",
" <th>FAIRsharing_attributes.metadata.deprecation-reason</th>\n",
" <th>FAIRsharing_attributes.metadata.tombstone</th>\n",
" <th>FAIRsharing_unique_id</th>\n",
" <th>re3data_orgIdentifier</th>\n",
" <th>re3data_repositoryName</th>\n",
" <th>re3data_repositoryName.language</th>\n",
" <th>re3data_additionalName</th>\n",
" <th>re3data_repositoryURL</th>\n",
" <th>re3data_repositoryIdentifier</th>\n",
" <th>re3data_repositoryContact</th>\n",
" <th>re3data_description</th>\n",
" <th>re3data_description.language</th>\n",
" <th>re3data_type</th>\n",
" <th>re3data_size</th>\n",
" <th>re3data_startDate</th>\n",
" <th>re3data_endDate</th>\n",
" <th>re3data_repositoryLanguage</th>\n",
" <th>re3data_subject</th>\n",
" <th>re3data_missionStatementURL</th>\n",
" <th>re3data_contentType</th>\n",
" <th>re3data_providerType</th>\n",
" <th>re3data_keyword</th>\n",
" <th>re3data_institution</th>\n",
" <th>re3data_policy</th>\n",
" <th>re3data_databaseAccess</th>\n",
" <th>re3data_databaseLicense</th>\n",
" <th>re3data_dataAccess</th>\n",
" <th>re3data_dataLicense</th>\n",
" <th>re3data_dataUploadType</th>\n",
" <th>re3data_dataUploadLicense</th>\n",
" <th>re3data_software</th>\n",
" <th>re3data_versioning</th>\n",
" <th>re3data_api</th>\n",
" <th>re3data_pidSystem</th>\n",
" <th>re3data_citationGuidelineURL</th>\n",
" <th>re3data_aidSystem</th>\n",
" <th>re3data_enhancedPublication</th>\n",
" <th>re3data_qualityManagement</th>\n",
" <th>re3data_certificate</th>\n",
" <th>re3data_metadataStandard</th>\n",
" <th>re3data_syndication</th>\n",
" <th>re3data_remarks</th>\n",
" <th>re3data_entryDate</th>\n",
" <th>re3data_lastUpdate</th>\n",
" <th>re3data_unique_id</th>\n",
" <th>OpenDOAR_system_metadata.id</th>\n",
" <th>OpenDOAR_repository_metadata.name</th>\n",
" <th>OpenDOAR_repository_metadata.alternativename</th>\n",
" <th>OpenDOAR_repository_metadata.url</th>\n",
" <th>OpenDOAR_repository_metadata.description</th>\n",
" <th>OpenDOAR_repository_metadata.type</th>\n",
" <th>OpenDOAR_repository_metadata.content_languages</th>\n",
" <th>OpenDOAR_system_metadata.date_modified</th>\n",
" <th>OpenDOAR_system_metadata.date_created</th>\n",
" <th>OpenDOAR_repository_metadata.content_subjects_phrases</th>\n",
" <th>OpenDOAR_repository_metadata.content_types</th>\n",
" <th>OpenDOAR_organization</th>\n",
" <th>OpenDOAR_policy_urls</th>\n",
" <th>OpenDOAR_repository_metadata.software</th>\n",
" <th>OpenDOAR_repository_metadata.oai_url</th>\n",
" <th>OpenDOAR_unique_id</th>\n",
" <th>roar_eprintid</th>\n",
" <th>roar_rev_number</th>\n",
" <th>roar_eprint_status</th>\n",
" <th>roar_userid</th>\n",
" <th>roar_importid</th>\n",
" <th>roar_source</th>\n",
" <th>roar_dir</th>\n",
" <th>roar_datestamp</th>\n",
" <th>roar_lastmod</th>\n",
" <th>roar_status_changed</th>\n",
" <th>roar_type</th>\n",
" <th>roar_succeeds</th>\n",
" <th>roar_commentary</th>\n",
" <th>roar_metadata_visibility</th>\n",
" <th>roar_latitude</th>\n",
" <th>roar_longitude</th>\n",
" <th>roar_relation_type</th>\n",
" <th>roar_relation_uri</th>\n",
" <th>roar_item_issues_id</th>\n",
" <th>roar_item_issues_type</th>\n",
" <th>roar_item_issues_description</th>\n",
" <th>roar_item_issues_timestamp</th>\n",
" <th>roar_item_issues_status</th>\n",
" <th>roar_item_issues_reported_by</th>\n",
" <th>roar_item_issues_resolved_by</th>\n",
" <th>roar_item_issues_comment</th>\n",
" <th>roar_item_issues_count</th>\n",
" <th>roar_sword_depositor</th>\n",
" <th>roar_sword_slug</th>\n",
" <th>roar_exemplar</th>\n",
" <th>roar_home_page</th>\n",
" <th>roar_title</th>\n",
" <th>roar_oai_pmh</th>\n",
" <th>roar_sword_endpoint</th>\n",
" <th>roar_rss_feed</th>\n",
" <th>roar_twitter_feed</th>\n",
" <th>roar_description</th>\n",
" <th>roar_fulltext</th>\n",
" <th>roar_open_access</th>\n",
" <th>roar_mandate</th>\n",
" <th>roar_organisation_title</th>\n",
" <th>roar_organisation_home_page</th>\n",
" <th>roar_location_country</th>\n",
" <th>roar_location_city</th>\n",
" <th>roar_location_latitude</th>\n",
" <th>roar_location_longitude</th>\n",
" <th>roar_software</th>\n",
" <th>roar_geoname</th>\n",
" <th>roar_version</th>\n",
" <th>roar_subjects</th>\n",
" <th>roar_date</th>\n",
" <th>roar_note</th>\n",
" <th>roar_suggestions</th>\n",
" <th>roar_activity_low</th>\n",
" <th>roar_activity_medium</th>\n",
" <th>roar_activity_high</th>\n",
" <th>roar_recordcount</th>\n",
" <th>roar_recordhistory</th>\n",
" <th>roar_fulltexts_total</th>\n",
" <th>roar_fulltexts_docs</th>\n",
" <th>roar_fulltexts_rtotal</th>\n",
" <th>roar_fulltexts_rdocs</th>\n",
" <th>roar_registry_name</th>\n",
" <th>roar_registry_id</th>\n",
" <th>roar_submit_to</th>\n",
" <th>roar_submitted_to_name</th>\n",
" <th>roar_submitted_to_done</th>\n",
" <th>roar_webometrics_rank</th>\n",
" <th>roar_webometrics_size</th>\n",
" <th>roar_webometrics_visibility</th>\n",
" <th>roar_webometrics_rich_files</th>\n",
" <th>roar_webometrics_scholar</th>\n",
" <th>roar_monthly_deposits</th>\n",
" <th>roar_total_deposits</th>\n",
" <th>roar_association</th>\n",
" <th>roar_unique_id</th>\n",
" <th>source_set</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>dedup::001e6d882e54c780ce269d3c46997287</td>\n",
" <td>[re3data_____::4af9fe2bb93511a5e0f0c39e94d6557...</td>\n",
" <td>[r3d100011306, 2094]</td>\n",
" <td>[RESID Database of Protein Modifications, RESI...</td>\n",
" <td>[re3data, FAIRsharing]</td>\n",
" <td>[re3data_r3d100011306, FAIRsharing_2094]</td>\n",
" <td>[nan, 2094]</td>\n",
" <td>[nan, fairsharing-records]</td>\n",
" <td>[nan, 2014-11-04T15:23:40.000Z]</td>\n",
" <td>[nan, 2021-09-30T11:38:37.114Z]</td>\n",
" <td>[nan, 10.25504/FAIRsharing.qaszjp]</td>\n",
" <td>[nan, RESID Database of Protein Modifications]</td>\n",
" <td>[nan, ready]</td>\n",
" <td>[nan, [{'contact-name': 'John S Garavelli', 'c...</td>\n",
" <td>[nan, http://pir.georgetown.edu/resid/]</td>\n",
" <td>[nan, 2094.0]</td>\n",
" <td>[nan, The RESID Database of Protein Modificati...</td>\n",
" <td>[nan, [{'url': 'http://pir.georgetown.edu/resi...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, [{'url': 'ftp://ftp.pir.georgetown.edu/p...</td>\n",
" <td>[nan, [biodbcore-000563, bsg-d000563]]</td>\n",
" <td>[nan, Database]</td>\n",
" <td>[nan, knowledgebase]</td>\n",
" <td>[nan, [Life Science]]</td>\n",
" <td>[nan, [Molecular structure, Small molecule, St...</td>\n",
" <td>[nan, [All]]</td>\n",
" <td>[nan, []]</td>\n",
" <td>[nan, [United Kingdom, European Union, Switzer...</td>\n",
" <td>[nan, FAIRsharing record for: RESID Database o...</td>\n",
" <td>[nan, RESID]</td>\n",
" <td>[nan, https://fairsharing.org/10.25504/FAIRsha...</td>\n",
" <td>[nan, 10.25504/FAIRsharing.qaszjp]</td>\n",
" <td>[nan, https://creativecommons.org/licenses/by-...</td>\n",
" <td>[nan, This FAIRsharing record describes: The R...</td>\n",
" <td>[nan, [{'id': 334, 'pubmed_id': 12520062, 'tit...</td>\n",
" <td>[nan, [{'licence-name': 'Open Data Commons (OD...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, RESID]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, FAIRsharing_2094]</td>\n",
" <td>[r3d100011306, nan]</td>\n",
" <td>[RESID Database of Protein Modifications, nan]</td>\n",
" <td>[eng, nan]</td>\n",
" <td>[[], nan]</td>\n",
" <td>[https://pir.georgetown.edu/resid/resid.shtml,...</td>\n",
" <td>[[FAIRsharing_doi:10.25504/FAIRsharing.qaszjp,...</td>\n",
" <td>[[\"pirmail@georgetown.edu\"], nan]</td>\n",
" <td>[The RESID Database of Protein Modifications i...</td>\n",
" <td>[eng, nan]</td>\n",
" <td>[[disciplinary], nan]</td>\n",
" <td>[{\"size\": \"\", \"updatedp\": \"\"}, nan]</td>\n",
" <td>[2014, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[[\"eng\"], nan]</td>\n",
" <td>[[2 Life Sciences, 201 Basic Biological and Me...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[[Images, Structured text], nan]</td>\n",
" <td>[[dataProvider], nan]</td>\n",
" <td>[[genomes, life sciences, proteins, proteomes,...</td>\n",
" <td>[[{'institutionName': 'Georgetown University, ...</td>\n",
" <td>[[{\"policyName\": \"Terms of Use\", \"policyURL\": ...</td>\n",
" <td>[ {\"databaseAccessType\": \"open\", \"databaseAcce...</td>\n",
" <td>[[], nan]</td>\n",
" <td>[[{\"dataAccessType\": \"open\", \"dataAccessRestri...</td>\n",
" <td>[[{\"dataLicenseName\": \"Copyrights\", \"dataLicen...</td>\n",
" <td>[closed, nan]</td>\n",
" <td>[[], nan]</td>\n",
" <td>[[\"unknown\"], nan]</td>\n",
" <td>[yes, nan]</td>\n",
" <td>[[\"ftp://ftp.pir.georgetown.edu/databases/\", \"...</td>\n",
" <td>[[\"none\"], nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[[], nan]</td>\n",
" <td>[yes, nan]</td>\n",
" <td>[unknown, nan]</td>\n",
" <td>[[], nan]</td>\n",
" <td>[[], nan]</td>\n",
" <td>[{}, nan]</td>\n",
" <td>[RESID is covered by Thomson Reuters Data Cita...</td>\n",
" <td>[2014-12-05, nan]</td>\n",
" <td>[2019-01-17, nan]</td>\n",
" <td>[re3data_r3d100011306, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>{re3data, FAIRsharing}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>dedup::0023a1e3447fdb31836536cc903f1310</td>\n",
" <td>[opendoar____::c6f798b844366ccd65d99bc7f31e0e0...</td>\n",
" <td>[3410, 10013]</td>\n",
" <td>[erucu: electronic repository of the ukrainian...</td>\n",
" <td>[OpenDOAR, roar]</td>\n",
" <td>[OpenDOAR_3410, roar_10013]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[3410, nan]</td>\n",
" <td>[{\"name\": \"erucu: electronic repository of the...</td>\n",
" <td>[[], nan]</td>\n",
" <td>[http://er.ucu.edu.ua/, nan]</td>\n",
" <td>[ukrainian catholic universitys institutional...</td>\n",
" <td>[institutional, nan]</td>\n",
" <td>[[\"uk\", \"en\"], nan]</td>\n",
" <td>[2019-10-17 14:34:57, nan]</td>\n",
" <td>[2015-07-08 12:43:38, nan]</td>\n",
" <td>[[\"multidisciplinary\"], nan]</td>\n",
" <td>[[\"journal_articles\", \"conference_and_workshop...</td>\n",
" <td>[[{\"name\": \"ukrainian catholic university\", \"a...</td>\n",
" <td>[[], nan]</td>\n",
" <td>[{\"name\": \"dspace\", \"version\": \"\"}, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[OpenDOAR_3410, nan]</td>\n",
" <td>[nan, 10013]</td>\n",
" <td>[nan, 31]</td>\n",
" <td>[nan, archive]</td>\n",
" <td>[nan, 7104]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, disk0/00/01/00/13]</td>\n",
" <td>[nan, 2015-08-08 14:53:04]</td>\n",
" <td>[nan, 2016-03-21 19:54:43]</td>\n",
" <td>[nan, 2015-08-08 14:53:04]</td>\n",
" <td>[nan, institutional]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, show]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, http://er.ucu.edu.ua/]</td>\n",
" <td>[nan, ErUCU: Electronic repository of the Ukra...</td>\n",
" <td>[nan, http://er.ucu.edu.ua/oai/request]</td>\n",
" <td>[nan, http://er.ucu.edu.ua/sword/]</td>\n",
" <td>[nan, http://er.ucu.edu.ua/feed/rss_2.0/site]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, Ukrainian Catholic Universitys institut...</td>\n",
" <td>[nan, TRUE]</td>\n",
" <td>[nan, TRUE]</td>\n",
" <td>[nan, TRUE]</td>\n",
" <td>[nan, Ukrainian Catholic University]</td>\n",
" <td>[nan, http://ucu.edu.ua/eng/]</td>\n",
" <td>[nan, ua]</td>\n",
" <td>[nan, Lviv]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, dspace]</td>\n",
" <td>[nan, geoname_2_UA]</td>\n",
" <td>[nan, other]</td>\n",
" <td>[nan, [BF, H1, BR, AC, BL, L1, D204, B1, D1, B...</td>\n",
" <td>[nan, 2015-07-07 12:38:37]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, [opendoar, celestial]]</td>\n",
" <td>[nan, [3410, 5883]]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, [russell_group, ivy_league]]</td>\n",
" <td>[nan, roar_10013]</td>\n",
" <td>{OpenDOAR, roar}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>dedup::003ab6b40af9b488decea7c582d150a2</td>\n",
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
" <td>[2315, r3d100011894]</td>\n",
" <td>[Synapse, Synapse]</td>\n",
" <td>[FAIRsharing, re3data]</td>\n",
" <td>[FAIRsharing_2315, re3data_r3d100011894]</td>\n",
" <td>[2315, nan]</td>\n",
" <td>[fairsharing-records, nan]</td>\n",
" <td>[2016-08-02T13:56:30.000Z, nan]</td>\n",
" <td>[2021-09-30T11:38:43.134Z, nan]</td>\n",
" <td>[10.25504/FAIRsharing.dnxzmk, nan]</td>\n",
" <td>[Synapse, nan]</td>\n",
" <td>[ready, nan]</td>\n",
" <td>[[{'contact-name': 'Meredith Slota', 'contact-...</td>\n",
" <td>[https://www.synapse.org/, nan]</td>\n",
" <td>[2315.0, nan]</td>\n",
" <td>[Synapse is a collaborative research platform ...</td>\n",
" <td>[[{'url': 'SynapseInfo@sagebase.org', 'name': ...</td>\n",
" <td>[2010.0, nan]</td>\n",
" <td>[[{'url': 'https://www.synapse.org/', 'name': ...</td>\n",
" <td>[[biodbcore-000791, bsg-d000791], nan]</td>\n",
" <td>[Database, nan]</td>\n",
" <td>[repository, nan]</td>\n",
" <td>[[Biomedical Science, Data Management, Data In...</td>\n",
" <td>[[Experimental measurement, Protocol, Data sto...</td>\n",
" <td>[[All], nan]</td>\n",
" <td>[[], nan]</td>\n",
" <td>[[United States], nan]</td>\n",
" <td>[FAIRsharing record for: Synapse, nan]</td>\n",
" <td>[Synapse, nan]</td>\n",
" <td>[https://fairsharing.org/10.25504/FAIRsharing....</td>\n",
" <td>[10.25504/FAIRsharing.dnxzmk, nan]</td>\n",
" <td>[https://creativecommons.org/licenses/by-sa/4....</td>\n",
" <td>[This FAIRsharing record describes: Synapse is...</td>\n",
" <td>[[{'id': 2450, 'pubmed_id': 24071850, 'title':...</td>\n",
" <td>[[{'licence-name': 'Creative Commons Attributi...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[Synapse, nan]</td>\n",
" <td>[[{'url': 'http://rest-docs.synapse.org/rest/'...</td>\n",
" <td>[[{'url': 'https://sage-bionetworks.github.io/...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[FAIRsharing_2315, nan]</td>\n",
" <td>[nan, r3d100011894]</td>\n",
" <td>[nan, Synapse]</td>\n",
" <td>[nan, eng]</td>\n",
" <td>[nan, []]</td>\n",
" <td>[nan, https://www.synapse.org]</td>\n",
" <td>[nan, [RRID:SCR_006307, RRID:nlx_151983]]</td>\n",
" <td>[nan, [\"synapseinfo@sagebase.org\"]]</td>\n",
" <td>[nan, Synapse is an open source software platf...</td>\n",
" <td>[nan, eng]</td>\n",
" <td>[nan, [other]]</td>\n",
" <td>[nan, {\"size\": \"\", \"updatedp\": \"\"}]</td>\n",
" <td>[nan, 2012-05-22]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, [\"eng\"]]</td>\n",
" <td>[nan, [2 Life Sciences, 201 Basic Biological a...</td>\n",
" <td>[nan, https://sagebionetworks.org/tools_resour...</td>\n",
" <td>[nan, [Raw data, Scientific and statistical da...</td>\n",
" <td>[nan, [dataProvider, serviceProvider]]</td>\n",
" <td>[nan, [AMP-AD Knowledge Portal, DREAM Challeng...</td>\n",
" <td>[nan, [{'institutionName': 'Alfred P. Sloan Fo...</td>\n",
" <td>[nan, [{\"policyName\": \"Synapse Commons Governa...</td>\n",
" <td>[nan, {\"databaseAccessType\": \"open\", \"databas...</td>\n",
" <td>[nan, []]</td>\n",
" <td>[nan, [{\"dataAccessType\": \"closed\", \"dataAcces...</td>\n",
" <td>[nan, [{\"dataLicenseName\": \"other\", \"dataLicen...</td>\n",
" <td>[nan, restricted]</td>\n",
" <td>[nan, []]</td>\n",
" <td>[nan, [\"unknown\"]]</td>\n",
" <td>[nan, yes]</td>\n",
" <td>[nan, [\"https://docs.synapse.org/rest/\", \"REST\"]]</td>\n",
" <td>[nan, [\"DOI\"]]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, []]</td>\n",
" <td>[nan, yes]</td>\n",
" <td>[nan, yes]</td>\n",
" <td>[nan, []]</td>\n",
" <td>[nan, []]</td>\n",
" <td>[nan, {}]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, 2015-12-03]</td>\n",
" <td>[nan, 2021-05-17]</td>\n",
" <td>[nan, re3data_r3d100011894]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>{FAIRsharing, re3data}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>dedup::0064f599ed0adb5870a5b3ffe438e485</td>\n",
" <td>[16034, opendoar____::d1f157379ea7e51d4a8c07af...</td>\n",
" <td>[16034, 9647]</td>\n",
" <td>[Giresun University Institutional Repository, ...</td>\n",
" <td>[roar, OpenDOAR]</td>\n",
" <td>[roar_16034, OpenDOAR_9647]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, 9647]</td>\n",
" <td>[nan, {\"name\": \"giresun university institution...</td>\n",
" <td>[nan, [{\"acronym\": \"dspace@giresun\"}, {\"name\":...</td>\n",
" <td>[nan, http://acikerisim.giresun.edu.tr]</td>\n",
" <td>[nan, this site provides access to the researc...</td>\n",
" <td>[nan, institutional]</td>\n",
" <td>[nan, [\"tr\"]]</td>\n",
" <td>[nan, 2021-05-21 18:05:06]</td>\n",
" <td>[nan, 2020-06-02 09:14:18]</td>\n",
" <td>[nan, [\"multidisciplinary\"]]</td>\n",
" <td>[nan, [\"journal_articles\"]]</td>\n",
" <td>[nan, [{\"name\": \"giresun university\", \"alterna...</td>\n",
" <td>[nan, []]</td>\n",
" <td>[nan, {\"name\": \"dspace\", \"version\": \"6.2\"}]</td>\n",
" <td>[nan, http://acikerisim.giresun.edu.tr/oai/req...</td>\n",
" <td>[nan, OpenDOAR_9647]</td>\n",
" <td>[16034, nan]</td>\n",
" <td>[7, nan]</td>\n",
" <td>[archive, nan]</td>\n",
" <td>[12932, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[disk0/00/01/60/34, nan]</td>\n",
" <td>[2020-06-01 20:13:50, nan]</td>\n",
" <td>[2020-06-01 20:14:04, nan]</td>\n",
" <td>[2020-06-01 20:13:50, nan]</td>\n",
" <td>[institutional, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[show, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[https://acikerisim.giresun.edu.tr, nan]</td>\n",
" <td>[Giresun University Institutional Repository, ...</td>\n",
" <td>[https://acikerisim.giresun.edu.tr/oai, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[DSpace@Giresun is a growing collection of Gir...</td>\n",
" <td>[TRUE, nan]</td>\n",
" <td>[TRUE, nan]</td>\n",
" <td>[TRUE, nan]</td>\n",
" <td>[Giresun University, nan]</td>\n",
" <td>[https://www.giresun.edu.tr/, nan]</td>\n",
" <td>[tr, nan]</td>\n",
" <td>[Giresun, nan]</td>\n",
" <td>[40.9147, nan]</td>\n",
" <td>[38.323, nan]</td>\n",
" <td>[dspace, nan]</td>\n",
" <td>[geoname_2_TR, nan]</td>\n",
" <td>[other, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[2020-05-29 18:13:17, nan]</td>\n",
" <td>[DSpace@Giresun is a growing collection of Gir...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[roarmap, nan]</td>\n",
" <td>[http://roarmap.eprints.org/1046/, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[roar_16034, nan]</td>\n",
" <td>{roar, OpenDOAR}</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>dedup::00ac8ed3b4327bdd4ebbebcb2ba10a00</td>\n",
" <td>[610, opendoar____::299fb2142d7de959380f91c01c...</td>\n",
" <td>[610, 1426]</td>\n",
" <td>[Hedatuz, hedatuz]</td>\n",
" <td>[roar, OpenDOAR]</td>\n",
" <td>[roar_610, OpenDOAR_1426]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, 1426]</td>\n",
" <td>[nan, {\"name\": \"hedatuz\", \"language\": \"en\"}]</td>\n",
" <td>[nan, []]</td>\n",
" <td>[nan, http://hedatuz.euskomedia.org/]</td>\n",
" <td>[nan, this site contains works published by eu...</td>\n",
" <td>[nan, disciplinary]</td>\n",
" <td>[nan, [\"eu\", \"fr\", \"es\", \"en\"]]</td>\n",
" <td>[nan, 2019-10-17 14:34:21]</td>\n",
" <td>[nan, 2009-02-02 13:13:26]</td>\n",
" <td>[nan, [\"multidisciplinary\"]]</td>\n",
" <td>[nan, [\"journal_articles\", \"books_chapters_and...</td>\n",
" <td>[nan, [{\"name\": \"euskomedia\", \"alternativeName...</td>\n",
" <td>[nan, []]</td>\n",
" <td>[nan, {\"name\": \"eprints\", \"version\": \"3.0.5\"}]</td>\n",
" <td>[nan, http://hedatuz.euskomedia.org/cgi/oai2]</td>\n",
" <td>[nan, OpenDOAR_1426]</td>\n",
" <td>[610, nan]</td>\n",
" <td>[514, nan]</td>\n",
" <td>[archive, nan]</td>\n",
" <td>[1, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[disk0/00/00/06/10, nan]</td>\n",
" <td>[2010-01-06 13:44:32, nan]</td>\n",
" <td>[2011-07-18 05:48:34, nan]</td>\n",
" <td>[2010-01-06 13:44:32, nan]</td>\n",
" <td>[institutional, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[show, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[0, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[http://hedatuz.euskomedia.org/, nan]</td>\n",
" <td>[Hedatuz, nan]</td>\n",
" <td>[http://hedatuz.euskomedia.org/cgi/oai2, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[http://hedatuz.euskomedia.org/cgi/latest_tool...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[Hedatuz, created by the Euskomedia Fundazioa,...</td>\n",
" <td>[TRUE, nan]</td>\n",
" <td>[TRUE, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[Euskomedia Fundazioa, nan]</td>\n",
" <td>[http://www.euskomedia.org, nan]</td>\n",
" <td>[org, nan]</td>\n",
" <td>[ (Unknown city), nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[eprints, nan]</td>\n",
" <td>[geoname_2_ORG, nan]</td>\n",
" <td>[eprints-3.0.5, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[2008-10-03 15:36:07, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[0, nan]</td>\n",
" <td>[0, nan]</td>\n",
" <td>[0, nan]</td>\n",
" <td>[100, nan]</td>\n",
" <td>[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,90,90,91,...</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[[opendoar, celestial], nan]</td>\n",
" <td>[[1294, 1426], nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[570, nan]</td>\n",
" <td>[331, nan]</td>\n",
" <td>[519, nan]</td>\n",
" <td>[145, nan]</td>\n",
" <td>[806, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[nan, nan]</td>\n",
" <td>[roar_610, nan]</td>\n",
" <td>{roar, OpenDOAR}</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" dedup_id \\\n",
"0 dedup::001e6d882e54c780ce269d3c46997287 \n",
"1 dedup::0023a1e3447fdb31836536cc903f1310 \n",
"2 dedup::003ab6b40af9b488decea7c582d150a2 \n",
"3 dedup::0064f599ed0adb5870a5b3ffe438e485 \n",
"4 dedup::00ac8ed3b4327bdd4ebbebcb2ba10a00 \n",
"\n",
" duplicate_id original_id \\\n",
"0 [re3data_____::4af9fe2bb93511a5e0f0c39e94d6557... [r3d100011306, 2094] \n",
"1 [opendoar____::c6f798b844366ccd65d99bc7f31e0e0... [3410, 10013] \n",
"2 [https://fairsharing.org/10.25504/FAIRsharing.... [2315, r3d100011894] \n",
"3 [16034, opendoar____::d1f157379ea7e51d4a8c07af... [16034, 9647] \n",
"4 [610, opendoar____::299fb2142d7de959380f91c01c... [610, 1426] \n",
"\n",
" name source \\\n",
"0 [RESID Database of Protein Modifications, RESI... [re3data, FAIRsharing] \n",
"1 [erucu: electronic repository of the ukrainian... [OpenDOAR, roar] \n",
"2 [Synapse, Synapse] [FAIRsharing, re3data] \n",
"3 [Giresun University Institutional Repository, ... [roar, OpenDOAR] \n",
"4 [Hedatuz, hedatuz] [roar, OpenDOAR] \n",
"\n",
" unique_id FAIRsharing_id \\\n",
"0 [re3data_r3d100011306, FAIRsharing_2094] [nan, 2094] \n",
"1 [OpenDOAR_3410, roar_10013] [nan, nan] \n",
"2 [FAIRsharing_2315, re3data_r3d100011894] [2315, nan] \n",
"3 [roar_16034, OpenDOAR_9647] [nan, nan] \n",
"4 [roar_610, OpenDOAR_1426] [nan, nan] \n",
"\n",
" FAIRsharing_type FAIRsharing_attributes.created-at \\\n",
"0 [nan, fairsharing-records] [nan, 2014-11-04T15:23:40.000Z] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [fairsharing-records, nan] [2016-08-02T13:56:30.000Z, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" FAIRsharing_attributes.updated-at FAIRsharing_attributes.metadata.doi \\\n",
"0 [nan, 2021-09-30T11:38:37.114Z] [nan, 10.25504/FAIRsharing.qaszjp] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [2021-09-30T11:38:43.134Z, nan] [10.25504/FAIRsharing.dnxzmk, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.name \\\n",
"0 [nan, RESID Database of Protein Modifications] \n",
"1 [nan, nan] \n",
"2 [Synapse, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.status \\\n",
"0 [nan, ready] \n",
"1 [nan, nan] \n",
"2 [ready, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.contacts \\\n",
"0 [nan, [{'contact-name': 'John S Garavelli', 'c... \n",
"1 [nan, nan] \n",
"2 [[{'contact-name': 'Meredith Slota', 'contact-... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.homepage \\\n",
"0 [nan, http://pir.georgetown.edu/resid/] \n",
"1 [nan, nan] \n",
"2 [https://www.synapse.org/, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.identifier \\\n",
"0 [nan, 2094.0] \n",
"1 [nan, nan] \n",
"2 [2315.0, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.description \\\n",
"0 [nan, The RESID Database of Protein Modificati... \n",
"1 [nan, nan] \n",
"2 [Synapse is a collaborative research platform ... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.support-links \\\n",
"0 [nan, [{'url': 'http://pir.georgetown.edu/resi... \n",
"1 [nan, nan] \n",
"2 [[{'url': 'SynapseInfo@sagebase.org', 'name': ... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.year-creation \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [2010.0, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.data-processes \\\n",
"0 [nan, [{'url': 'ftp://ftp.pir.georgetown.edu/p... \n",
"1 [nan, nan] \n",
"2 [[{'url': 'https://www.synapse.org/', 'name': ... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.legacy-ids \\\n",
"0 [nan, [biodbcore-000563, bsg-d000563]] \n",
"1 [nan, nan] \n",
"2 [[biodbcore-000791, bsg-d000791], nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.fairsharing-registry \\\n",
"0 [nan, Database] \n",
"1 [nan, nan] \n",
"2 [Database, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.record-type \\\n",
"0 [nan, knowledgebase] \n",
"1 [nan, nan] \n",
"2 [repository, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.subjects \\\n",
"0 [nan, [Life Science]] \n",
"1 [nan, nan] \n",
"2 [[Biomedical Science, Data Management, Data In... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.domains \\\n",
"0 [nan, [Molecular structure, Small molecule, St... \n",
"1 [nan, nan] \n",
"2 [[Experimental measurement, Protocol, Data sto... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.taxonomies FAIRsharing_attributes.user-defined-tags \\\n",
"0 [nan, [All]] [nan, []] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [[All], nan] [[], nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" FAIRsharing_attributes.countries \\\n",
"0 [nan, [United Kingdom, European Union, Switzer... \n",
"1 [nan, nan] \n",
"2 [[United States], nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.name \\\n",
"0 [nan, FAIRsharing record for: RESID Database o... \n",
"1 [nan, nan] \n",
"2 [FAIRsharing record for: Synapse, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.abbreviation \\\n",
"0 [nan, RESID] \n",
"1 [nan, nan] \n",
"2 [Synapse, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.url \\\n",
"0 [nan, https://fairsharing.org/10.25504/FAIRsha... \n",
"1 [nan, nan] \n",
"2 [https://fairsharing.org/10.25504/FAIRsharing.... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.doi \\\n",
"0 [nan, 10.25504/FAIRsharing.qaszjp] \n",
"1 [nan, nan] \n",
"2 [10.25504/FAIRsharing.dnxzmk, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.fairsharing-licence \\\n",
"0 [nan, https://creativecommons.org/licenses/by-... \n",
"1 [nan, nan] \n",
"2 [https://creativecommons.org/licenses/by-sa/4.... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.description \\\n",
"0 [nan, This FAIRsharing record describes: The R... \n",
"1 [nan, nan] \n",
"2 [This FAIRsharing record describes: Synapse is... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.publications \\\n",
"0 [nan, [{'id': 334, 'pubmed_id': 12520062, 'tit... \n",
"1 [nan, nan] \n",
"2 [[{'id': 2450, 'pubmed_id': 24071850, 'title':... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.licence-links \\\n",
"0 [nan, [{'licence-name': 'Open Data Commons (OD... \n",
"1 [nan, nan] \n",
"2 [[{'licence-name': 'Creative Commons Attributi... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.citations \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.abbreviation \\\n",
"0 [nan, RESID] \n",
"1 [nan, nan] \n",
"2 [Synapse, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.access-points \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [[{'url': 'http://rest-docs.synapse.org/rest/'... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.associated-tools \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [[{'url': 'https://sage-bionetworks.github.io/... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.deprecation-date \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.deprecation-reason \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" FAIRsharing_attributes.metadata.tombstone FAIRsharing_unique_id \\\n",
"0 [nan, nan] [nan, FAIRsharing_2094] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan] [FAIRsharing_2315, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" re3data_orgIdentifier re3data_repositoryName \\\n",
"0 [r3d100011306, nan] [RESID Database of Protein Modifications, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, r3d100011894] [nan, Synapse] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" re3data_repositoryName.language re3data_additionalName \\\n",
"0 [eng, nan] [[], nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, eng] [nan, []] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" re3data_repositoryURL \\\n",
"0 [https://pir.georgetown.edu/resid/resid.shtml,... \n",
"1 [nan, nan] \n",
"2 [nan, https://www.synapse.org] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" re3data_repositoryIdentifier \\\n",
"0 [[FAIRsharing_doi:10.25504/FAIRsharing.qaszjp,... \n",
"1 [nan, nan] \n",
"2 [nan, [RRID:SCR_006307, RRID:nlx_151983]] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" re3data_repositoryContact \\\n",
"0 [[\"pirmail@georgetown.edu\"], nan] \n",
"1 [nan, nan] \n",
"2 [nan, [\"synapseinfo@sagebase.org\"]] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" re3data_description \\\n",
"0 [The RESID Database of Protein Modifications i... \n",
"1 [nan, nan] \n",
"2 [nan, Synapse is an open source software platf... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" re3data_description.language re3data_type \\\n",
"0 [eng, nan] [[disciplinary], nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, eng] [nan, [other]] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" re3data_size re3data_startDate re3data_endDate \\\n",
"0 [{\"size\": \"\", \"updatedp\": \"\"}, nan] [2014, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, {\"size\": \"\", \"updatedp\": \"\"}] [nan, 2012-05-22] [nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_repositoryLanguage \\\n",
"0 [[\"eng\"], nan] \n",
"1 [nan, nan] \n",
"2 [nan, [\"eng\"]] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" re3data_subject \\\n",
"0 [[2 Life Sciences, 201 Basic Biological and Me... \n",
"1 [nan, nan] \n",
"2 [nan, [2 Life Sciences, 201 Basic Biological a... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" re3data_missionStatementURL \\\n",
"0 [nan, nan] \n",
"1 [nan, nan] \n",
"2 [nan, https://sagebionetworks.org/tools_resour... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" re3data_contentType \\\n",
"0 [[Images, Structured text], nan] \n",
"1 [nan, nan] \n",
"2 [nan, [Raw data, Scientific and statistical da... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" re3data_providerType \\\n",
"0 [[dataProvider], nan] \n",
"1 [nan, nan] \n",
"2 [nan, [dataProvider, serviceProvider]] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" re3data_keyword \\\n",
"0 [[genomes, life sciences, proteins, proteomes,... \n",
"1 [nan, nan] \n",
"2 [nan, [AMP-AD Knowledge Portal, DREAM Challeng... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" re3data_institution \\\n",
"0 [[{'institutionName': 'Georgetown University, ... \n",
"1 [nan, nan] \n",
"2 [nan, [{'institutionName': 'Alfred P. Sloan Fo... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" re3data_policy \\\n",
"0 [[{\"policyName\": \"Terms of Use\", \"policyURL\": ... \n",
"1 [nan, nan] \n",
"2 [nan, [{\"policyName\": \"Synapse Commons Governa... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" re3data_databaseAccess re3data_databaseLicense \\\n",
"0 [ {\"databaseAccessType\": \"open\", \"databaseAcce... [[], nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, {\"databaseAccessType\": \"open\", \"databas... [nan, []] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" re3data_dataAccess \\\n",
"0 [[{\"dataAccessType\": \"open\", \"dataAccessRestri... \n",
"1 [nan, nan] \n",
"2 [nan, [{\"dataAccessType\": \"closed\", \"dataAcces... \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" re3data_dataLicense re3data_dataUploadType \\\n",
"0 [[{\"dataLicenseName\": \"Copyrights\", \"dataLicen... [closed, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, [{\"dataLicenseName\": \"other\", \"dataLicen... [nan, restricted] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" re3data_dataUploadLicense re3data_software re3data_versioning \\\n",
"0 [[], nan] [[\"unknown\"], nan] [yes, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, []] [nan, [\"unknown\"]] [nan, yes] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_api re3data_pidSystem \\\n",
"0 [[\"ftp://ftp.pir.georgetown.edu/databases/\", \"... [[\"none\"], nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, [\"https://docs.synapse.org/rest/\", \"REST\"]] [nan, [\"DOI\"]] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" re3data_citationGuidelineURL re3data_aidSystem re3data_enhancedPublication \\\n",
"0 [nan, nan] [[], nan] [yes, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan] [nan, []] [nan, yes] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_qualityManagement re3data_certificate re3data_metadataStandard \\\n",
"0 [unknown, nan] [[], nan] [[], nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, yes] [nan, []] [nan, []] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" re3data_syndication re3data_remarks \\\n",
"0 [{}, nan] [RESID is covered by Thomson Reuters Data Cita... \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, {}] [nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" re3data_entryDate re3data_lastUpdate re3data_unique_id \\\n",
"0 [2014-12-05, nan] [2019-01-17, nan] [re3data_r3d100011306, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, 2015-12-03] [nan, 2021-05-17] [nan, re3data_r3d100011894] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" OpenDOAR_system_metadata.id \\\n",
"0 [nan, nan] \n",
"1 [3410, nan] \n",
"2 [nan, nan] \n",
"3 [nan, 9647] \n",
"4 [nan, 1426] \n",
"\n",
" OpenDOAR_repository_metadata.name \\\n",
"0 [nan, nan] \n",
"1 [{\"name\": \"erucu: electronic repository of the... \n",
"2 [nan, nan] \n",
"3 [nan, {\"name\": \"giresun university institution... \n",
"4 [nan, {\"name\": \"hedatuz\", \"language\": \"en\"}] \n",
"\n",
" OpenDOAR_repository_metadata.alternativename \\\n",
"0 [nan, nan] \n",
"1 [[], nan] \n",
"2 [nan, nan] \n",
"3 [nan, [{\"acronym\": \"dspace@giresun\"}, {\"name\":... \n",
"4 [nan, []] \n",
"\n",
" OpenDOAR_repository_metadata.url \\\n",
"0 [nan, nan] \n",
"1 [http://er.ucu.edu.ua/, nan] \n",
"2 [nan, nan] \n",
"3 [nan, http://acikerisim.giresun.edu.tr] \n",
"4 [nan, http://hedatuz.euskomedia.org/] \n",
"\n",
" OpenDOAR_repository_metadata.description \\\n",
"0 [nan, nan] \n",
"1 [ukrainian catholic universitys institutional... \n",
"2 [nan, nan] \n",
"3 [nan, this site provides access to the researc... \n",
"4 [nan, this site contains works published by eu... \n",
"\n",
" OpenDOAR_repository_metadata.type \\\n",
"0 [nan, nan] \n",
"1 [institutional, nan] \n",
"2 [nan, nan] \n",
"3 [nan, institutional] \n",
"4 [nan, disciplinary] \n",
"\n",
" OpenDOAR_repository_metadata.content_languages \\\n",
"0 [nan, nan] \n",
"1 [[\"uk\", \"en\"], nan] \n",
"2 [nan, nan] \n",
"3 [nan, [\"tr\"]] \n",
"4 [nan, [\"eu\", \"fr\", \"es\", \"en\"]] \n",
"\n",
" OpenDOAR_system_metadata.date_modified \\\n",
"0 [nan, nan] \n",
"1 [2019-10-17 14:34:57, nan] \n",
"2 [nan, nan] \n",
"3 [nan, 2021-05-21 18:05:06] \n",
"4 [nan, 2019-10-17 14:34:21] \n",
"\n",
" OpenDOAR_system_metadata.date_created \\\n",
"0 [nan, nan] \n",
"1 [2015-07-08 12:43:38, nan] \n",
"2 [nan, nan] \n",
"3 [nan, 2020-06-02 09:14:18] \n",
"4 [nan, 2009-02-02 13:13:26] \n",
"\n",
" OpenDOAR_repository_metadata.content_subjects_phrases \\\n",
"0 [nan, nan] \n",
"1 [[\"multidisciplinary\"], nan] \n",
"2 [nan, nan] \n",
"3 [nan, [\"multidisciplinary\"]] \n",
"4 [nan, [\"multidisciplinary\"]] \n",
"\n",
" OpenDOAR_repository_metadata.content_types \\\n",
"0 [nan, nan] \n",
"1 [[\"journal_articles\", \"conference_and_workshop... \n",
"2 [nan, nan] \n",
"3 [nan, [\"journal_articles\"]] \n",
"4 [nan, [\"journal_articles\", \"books_chapters_and... \n",
"\n",
" OpenDOAR_organization OpenDOAR_policy_urls \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [[{\"name\": \"ukrainian catholic university\", \"a... [[], nan] \n",
"2 [nan, nan] [nan, nan] \n",
"3 [nan, [{\"name\": \"giresun university\", \"alterna... [nan, []] \n",
"4 [nan, [{\"name\": \"euskomedia\", \"alternativeName... [nan, []] \n",
"\n",
" OpenDOAR_repository_metadata.software \\\n",
"0 [nan, nan] \n",
"1 [{\"name\": \"dspace\", \"version\": \"\"}, nan] \n",
"2 [nan, nan] \n",
"3 [nan, {\"name\": \"dspace\", \"version\": \"6.2\"}] \n",
"4 [nan, {\"name\": \"eprints\", \"version\": \"3.0.5\"}] \n",
"\n",
" OpenDOAR_repository_metadata.oai_url OpenDOAR_unique_id \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [OpenDOAR_3410, nan] \n",
"2 [nan, nan] [nan, nan] \n",
"3 [nan, http://acikerisim.giresun.edu.tr/oai/req... [nan, OpenDOAR_9647] \n",
"4 [nan, http://hedatuz.euskomedia.org/cgi/oai2] [nan, OpenDOAR_1426] \n",
"\n",
" roar_eprintid roar_rev_number roar_eprint_status roar_userid \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, 10013] [nan, 31] [nan, archive] [nan, 7104] \n",
"2 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"3 [16034, nan] [7, nan] [archive, nan] [12932, nan] \n",
"4 [610, nan] [514, nan] [archive, nan] [1, nan] \n",
"\n",
" roar_importid roar_source roar_dir \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, disk0/00/01/00/13] \n",
"2 [nan, nan] [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] [disk0/00/01/60/34, nan] \n",
"4 [nan, nan] [nan, nan] [disk0/00/00/06/10, nan] \n",
"\n",
" roar_datestamp roar_lastmod \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, 2015-08-08 14:53:04] [nan, 2016-03-21 19:54:43] \n",
"2 [nan, nan] [nan, nan] \n",
"3 [2020-06-01 20:13:50, nan] [2020-06-01 20:14:04, nan] \n",
"4 [2010-01-06 13:44:32, nan] [2011-07-18 05:48:34, nan] \n",
"\n",
" roar_status_changed roar_type roar_succeeds \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, 2015-08-08 14:53:04] [nan, institutional] [nan, nan] \n",
"2 [nan, nan] [nan, nan] [nan, nan] \n",
"3 [2020-06-01 20:13:50, nan] [institutional, nan] [nan, nan] \n",
"4 [2010-01-06 13:44:32, nan] [institutional, nan] [nan, nan] \n",
"\n",
" roar_commentary roar_metadata_visibility roar_latitude roar_longitude \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, show] [nan, nan] [nan, nan] \n",
"2 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"3 [nan, nan] [show, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [show, nan] [nan, nan] [nan, nan] \n",
"\n",
" roar_relation_type roar_relation_uri roar_item_issues_id \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan] [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" roar_item_issues_type roar_item_issues_description \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" roar_item_issues_timestamp roar_item_issues_status \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" roar_item_issues_reported_by roar_item_issues_resolved_by \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" roar_item_issues_comment roar_item_issues_count roar_sword_depositor \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan] [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [0, nan] [nan, nan] \n",
"\n",
" roar_sword_slug roar_exemplar roar_home_page \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, http://er.ucu.edu.ua/] \n",
"2 [nan, nan] [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] [https://acikerisim.giresun.edu.tr, nan] \n",
"4 [nan, nan] [nan, nan] [http://hedatuz.euskomedia.org/, nan] \n",
"\n",
" roar_title \\\n",
"0 [nan, nan] \n",
"1 [nan, ErUCU: Electronic repository of the Ukra... \n",
"2 [nan, nan] \n",
"3 [Giresun University Institutional Repository, ... \n",
"4 [Hedatuz, nan] \n",
"\n",
" roar_oai_pmh \\\n",
"0 [nan, nan] \n",
"1 [nan, http://er.ucu.edu.ua/oai/request] \n",
"2 [nan, nan] \n",
"3 [https://acikerisim.giresun.edu.tr/oai, nan] \n",
"4 [http://hedatuz.euskomedia.org/cgi/oai2, nan] \n",
"\n",
" roar_sword_endpoint \\\n",
"0 [nan, nan] \n",
"1 [nan, http://er.ucu.edu.ua/sword/] \n",
"2 [nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" roar_rss_feed roar_twitter_feed \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, http://er.ucu.edu.ua/feed/rss_2.0/site] [nan, nan] \n",
"2 [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [http://hedatuz.euskomedia.org/cgi/latest_tool... [nan, nan] \n",
"\n",
" roar_description roar_fulltext \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, Ukrainian Catholic Universitys institut... [nan, TRUE] \n",
"2 [nan, nan] [nan, nan] \n",
"3 [DSpace@Giresun is a growing collection of Gir... [TRUE, nan] \n",
"4 [Hedatuz, created by the Euskomedia Fundazioa,... [TRUE, nan] \n",
"\n",
" roar_open_access roar_mandate roar_organisation_title \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, TRUE] [nan, TRUE] [nan, Ukrainian Catholic University] \n",
"2 [nan, nan] [nan, nan] [nan, nan] \n",
"3 [TRUE, nan] [TRUE, nan] [Giresun University, nan] \n",
"4 [TRUE, nan] [nan, nan] [Euskomedia Fundazioa, nan] \n",
"\n",
" roar_organisation_home_page roar_location_country \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, http://ucu.edu.ua/eng/] [nan, ua] \n",
"2 [nan, nan] [nan, nan] \n",
"3 [https://www.giresun.edu.tr/, nan] [tr, nan] \n",
"4 [http://www.euskomedia.org, nan] [org, nan] \n",
"\n",
" roar_location_city roar_location_latitude roar_location_longitude \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, Lviv] [nan, nan] [nan, nan] \n",
"2 [nan, nan] [nan, nan] [nan, nan] \n",
"3 [Giresun, nan] [40.9147, nan] [38.323, nan] \n",
"4 [ (Unknown city), nan] [nan, nan] [nan, nan] \n",
"\n",
" roar_software roar_geoname roar_version \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, dspace] [nan, geoname_2_UA] [nan, other] \n",
"2 [nan, nan] [nan, nan] [nan, nan] \n",
"3 [dspace, nan] [geoname_2_TR, nan] [other, nan] \n",
"4 [eprints, nan] [geoname_2_ORG, nan] [eprints-3.0.5, nan] \n",
"\n",
" roar_subjects \\\n",
"0 [nan, nan] \n",
"1 [nan, [BF, H1, BR, AC, BL, L1, D204, B1, D1, B... \n",
"2 [nan, nan] \n",
"3 [nan, nan] \n",
"4 [nan, nan] \n",
"\n",
" roar_date \\\n",
"0 [nan, nan] \n",
"1 [nan, 2015-07-07 12:38:37] \n",
"2 [nan, nan] \n",
"3 [2020-05-29 18:13:17, nan] \n",
"4 [2008-10-03 15:36:07, nan] \n",
"\n",
" roar_note roar_suggestions \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan] [nan, nan] \n",
"3 [DSpace@Giresun is a growing collection of Gir... [nan, nan] \n",
"4 [nan, nan] [nan, nan] \n",
"\n",
" roar_activity_low roar_activity_medium roar_activity_high roar_recordcount \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] [nan, nan] \n",
"4 [0, nan] [0, nan] [0, nan] [100, nan] \n",
"\n",
" roar_recordhistory roar_fulltexts_total \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] \n",
"2 [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] \n",
"4 [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,90,90,91,... [nan, nan] \n",
"\n",
" roar_fulltexts_docs roar_fulltexts_rtotal roar_fulltexts_rdocs \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan] [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" roar_registry_name roar_registry_id \\\n",
"0 [nan, nan] [nan, nan] \n",
"1 [nan, [opendoar, celestial]] [nan, [3410, 5883]] \n",
"2 [nan, nan] [nan, nan] \n",
"3 [roarmap, nan] [http://roarmap.eprints.org/1046/, nan] \n",
"4 [[opendoar, celestial], nan] [[1294, 1426], nan] \n",
"\n",
" roar_submit_to roar_submitted_to_name roar_submitted_to_done \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan] [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [nan, nan] [nan, nan] [nan, nan] \n",
"\n",
" roar_webometrics_rank roar_webometrics_size roar_webometrics_visibility \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan] [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [570, nan] [331, nan] [519, nan] \n",
"\n",
" roar_webometrics_rich_files roar_webometrics_scholar roar_monthly_deposits \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, nan] [nan, nan] \n",
"2 [nan, nan] [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] [nan, nan] \n",
"4 [145, nan] [806, nan] [nan, nan] \n",
"\n",
" roar_total_deposits roar_association roar_unique_id \\\n",
"0 [nan, nan] [nan, nan] [nan, nan] \n",
"1 [nan, nan] [nan, [russell_group, ivy_league]] [nan, roar_10013] \n",
"2 [nan, nan] [nan, nan] [nan, nan] \n",
"3 [nan, nan] [nan, nan] [roar_16034, nan] \n",
"4 [nan, nan] [nan, nan] [roar_610, nan] \n",
"\n",
" source_set \n",
"0 {re3data, FAIRsharing} \n",
"1 {OpenDOAR, roar} \n",
"2 {FAIRsharing, re3data} \n",
"3 {roar, OpenDOAR} \n",
"4 {roar, OpenDOAR} "
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dup_across = dup_across.groupby('dedup_id').aggregate(list).reset_index()\n",
"dup_across['source_set'] = dup_across.source.map(set)\n",
"dup_across.head()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"def remove_nan(list_obj):\n",
" if isinstance(list_obj, list):\n",
" while np.nan in list_obj:\n",
" list_obj.remove(np.nan)\n",
" return list_obj\n",
" \n",
"dup_within.applymap(remove_nan).to_csv('../data/processed/dup_within.csv')\n",
"dup_hybrid.applymap(remove_nan).to_csv('../data/processed/dup_hybrid.csv')\n",
"dup_across.applymap(remove_nan).to_csv('../data/processed/dup_across.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}