2021-07-22 11:35:40 +02:00
|
|
|
{
|
|
|
|
"cells": [
|
2021-07-23 12:38:56 +02:00
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"Information to check\n",
|
|
|
|
"- names\n",
|
|
|
|
"- description\n",
|
|
|
|
"- url\n",
|
|
|
|
"- subjects & keywords\n",
|
|
|
|
"- content type\n",
|
|
|
|
"- repo type\n",
|
|
|
|
"- policies\n",
|
|
|
|
"\n"
|
|
|
|
]
|
|
|
|
},
|
2021-07-22 11:35:40 +02:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 1,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import ast\n",
|
|
|
|
"import csv\n",
|
|
|
|
"import json\n",
|
|
|
|
"\n",
|
|
|
|
"import numpy as np\n",
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"\n",
|
|
|
|
"import plotly\n",
|
|
|
|
"from plotly.offline import iplot, init_notebook_mode\n",
|
|
|
|
"import plotly.graph_objs as go\n",
|
|
|
|
"import plotly.express as px\n",
|
|
|
|
"\n",
|
|
|
|
"pd.set_option('display.max_columns', None)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
2021-07-23 12:38:56 +02:00
|
|
|
"## Loading dataset"
|
2021-07-22 11:35:40 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2021-07-23 15:28:23 +02:00
|
|
|
"execution_count": 2,
|
2021-07-22 11:35:40 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <th>openaire_id</th>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <th>re3data_id</th>\n",
|
|
|
|
" <th>repository_name</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <th>additional_name</th>\n",
|
|
|
|
" <th>repository_url</th>\n",
|
|
|
|
" <th>repository_id</th>\n",
|
|
|
|
" <th>description</th>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <th>type</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <th>size</th>\n",
|
|
|
|
" <th>update_date</th>\n",
|
|
|
|
" <th>start_date</th>\n",
|
|
|
|
" <th>end_date</th>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <th>subject</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <th>mission_statement</th>\n",
|
|
|
|
" <th>content_type</th>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <th>provider_type</th>\n",
|
|
|
|
" <th>keyword</th>\n",
|
|
|
|
" <th>institution</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <th>policy</th>\n",
|
|
|
|
" <th>database_access</th>\n",
|
|
|
|
" <th>database_license</th>\n",
|
|
|
|
" <th>data_access</th>\n",
|
|
|
|
" <th>data_license</th>\n",
|
|
|
|
" <th>data_upload</th>\n",
|
|
|
|
" <th>data_upload_license</th>\n",
|
|
|
|
" <th>software</th>\n",
|
|
|
|
" <th>versioning</th>\n",
|
|
|
|
" <th>api</th>\n",
|
|
|
|
" <th>pid_system</th>\n",
|
|
|
|
" <th>citation_guideline_url</th>\n",
|
|
|
|
" <th>aid_system</th>\n",
|
|
|
|
" <th>enhanced_publication</th>\n",
|
|
|
|
" <th>quality_management</th>\n",
|
|
|
|
" <th>certificate</th>\n",
|
|
|
|
" <th>metadata_standard</th>\n",
|
|
|
|
" <th>syndication</th>\n",
|
|
|
|
" <th>remarks</th>\n",
|
|
|
|
" <th>entry_date</th>\n",
|
|
|
|
" <th>last_update</th>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>0</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>re3data_____::91780fe96da5ba32f804e43359c154ba</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>r3d100000001</td>\n",
|
|
|
|
" <td>Odum Institute Archive Dataverse</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>https://dataverse.unc.edu/dataverse/odum</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>The Odum Institute Archive Dataverse contains ...</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[disciplinary]</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>13 dataverses; 3.050 datasets</td>\n",
|
|
|
|
" <td>2020-12-04</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[1 Humanities and Social Sciences, 111 Social ...</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>[Databases, Plain text, Scientific and statist...</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[dataProvider]</td>\n",
|
|
|
|
" <td>[FAIR, Middle East, crime, demography, economy...</td>\n",
|
|
|
|
" <td>[[Odum Institute for Research in Social Scienc...</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>unknown</td>\n",
|
|
|
|
" <td>yes</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>Odum Dataverse is covered by Thomson Reuters D...</td>\n",
|
|
|
|
" <td>2013-06-10</td>\n",
|
|
|
|
" <td>2021-07-06</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>1</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>re3data_____::cc3ea05c863cd49af75f7f54e0e86f09</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>r3d100000002</td>\n",
|
|
|
|
" <td>Access to Archival Databases</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>[AAD]</td>\n",
|
|
|
|
" <td>https://aad.archives.gov/aad/</td>\n",
|
|
|
|
" <td>[RRID:SCR_010479, RRID:nlx_157752]</td>\n",
|
|
|
|
" <td>You will find in the Access to Archival Databa...</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[disciplinary]</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>1985</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[1 Humanities and Social Sciences, 102 History...</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>[Images, Standard office documents, Structured...</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[dataProvider]</td>\n",
|
|
|
|
" <td>[US History]</td>\n",
|
|
|
|
" <td>[[The U.S. National Archives and Records Admin...</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>no</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>unknown</td>\n",
|
|
|
|
" <td>unknown</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>2012-07-04</td>\n",
|
|
|
|
" <td>2021-05-25</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>2</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>re3data_____::a2f73fbe91311f4356d0d7957c441773</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>r3d100000004</td>\n",
|
|
|
|
" <td>Datenbank Gesprochenes Deutsch</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>[DGD, DGD2 (formerly), Database for Spoken Ger...</td>\n",
|
|
|
|
" <td>https://dgd.ids-mannheim.de/</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>The \"Database for Spoken German (DGD)\" is a co...</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[disciplinary]</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>34 corpora</td>\n",
|
|
|
|
" <td>2020-02-03</td>\n",
|
|
|
|
" <td>2012</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[1 Humanities and Social Sciences, 104 Linguis...</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>[Audiovisual data, Standard office documents, ...</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[dataProvider, serviceProvider]</td>\n",
|
|
|
|
" <td>[Australian German, FOLK, German dialects, Pfe...</td>\n",
|
|
|
|
" <td>[[Institut für Deutsche Sprache, Archiv für Ge...</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>yes</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>unknown</td>\n",
|
|
|
|
" <td>unknown</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
|
|
|
" <td>2012-07-20</td>\n",
|
|
|
|
" <td>2020-08-27</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>3</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>re3data_____::0394b97eb11f19785cbca1ec830429da</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>r3d100000005</td>\n",
|
|
|
|
" <td>UNC Dataverse</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>[University of North Carolina Dataverse]</td>\n",
|
|
|
|
" <td>https://dataverse.unc.edu/</td>\n",
|
|
|
|
" <td>[]</td>\n",
|
|
|
|
" <td>UNC Dataverse is an open-source repository sof...</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[institutional]</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>186 dataverses; 25.272 studies; 229.442 files</td>\n",
|
|
|
|
" <td>2020-11-30</td>\n",
|
|
|
|
" <td>2011</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[1 Humanities and Social Sciences, 111 Social ...</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>[Archived data, Plain text, Raw data, Scientif...</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[dataProvider, serviceProvider]</td>\n",
|
|
|
|
" <td>[FAIR, census, demographic survey, demography,...</td>\n",
|
|
|
|
" <td>[[Odum Institute for Research in Social Scienc...</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>yes</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>unknown</td>\n",
|
|
|
|
" <td>yes</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>The Odum Institute houses one of the oldest an...</td>\n",
|
|
|
|
" <td>2012-07-23</td>\n",
|
|
|
|
" <td>2020-11-30</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>4</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>re3data_____::a48f09c562b247a9919acfe195549b47</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>r3d100000006</td>\n",
|
|
|
|
" <td>Archaeology Data Service</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>[ADS]</td>\n",
|
|
|
|
" <td>https://archaeologydataservice.ac.uk/</td>\n",
|
|
|
|
" <td>[FAIRsharing_doi:10.25504/FAIRsharing.hm1mfg]</td>\n",
|
|
|
|
" <td>The ADS is an accredited digital repository fo...</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[disciplinary]</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>1837 results</td>\n",
|
|
|
|
" <td>2020-05-20</td>\n",
|
|
|
|
" <td>1996-10-01</td>\n",
|
|
|
|
" <td>NaN</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[1 Humanities and Social Sciences, 101 Ancient...</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>[Archived data, Audiovisual data, Databases, I...</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[dataProvider, serviceProvider]</td>\n",
|
|
|
|
" <td>[FAIR, archaeology, cultural heritage, prehist...</td>\n",
|
|
|
|
" <td>[[Arts and Humanities Research Council, [AHRC]...</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>yes</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>unknown</td>\n",
|
|
|
|
" <td>yes</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>ADS is covered by Clarivate Data Citation Inde...</td>\n",
|
|
|
|
" <td>2012-07-23</td>\n",
|
|
|
|
" <td>2021-06-11</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
2021-07-23 12:38:56 +02:00
|
|
|
" openaire_id re3data_id \\\n",
|
|
|
|
"0 re3data_____::91780fe96da5ba32f804e43359c154ba r3d100000001 \n",
|
|
|
|
"1 re3data_____::cc3ea05c863cd49af75f7f54e0e86f09 r3d100000002 \n",
|
|
|
|
"2 re3data_____::a2f73fbe91311f4356d0d7957c441773 r3d100000004 \n",
|
|
|
|
"3 re3data_____::0394b97eb11f19785cbca1ec830429da r3d100000005 \n",
|
|
|
|
"4 re3data_____::a48f09c562b247a9919acfe195549b47 r3d100000006 \n",
|
|
|
|
"\n",
|
|
|
|
" repository_name \\\n",
|
|
|
|
"0 Odum Institute Archive Dataverse \n",
|
|
|
|
"1 Access to Archival Databases \n",
|
|
|
|
"2 Datenbank Gesprochenes Deutsch \n",
|
|
|
|
"3 UNC Dataverse \n",
|
|
|
|
"4 Archaeology Data Service \n",
|
|
|
|
"\n",
|
|
|
|
" additional_name \\\n",
|
|
|
|
"0 [] \n",
|
|
|
|
"1 [AAD] \n",
|
|
|
|
"2 [DGD, DGD2 (formerly), Database for Spoken Ger... \n",
|
|
|
|
"3 [University of North Carolina Dataverse] \n",
|
|
|
|
"4 [ADS] \n",
|
|
|
|
"\n",
|
|
|
|
" repository_url \\\n",
|
|
|
|
"0 https://dataverse.unc.edu/dataverse/odum \n",
|
|
|
|
"1 https://aad.archives.gov/aad/ \n",
|
|
|
|
"2 https://dgd.ids-mannheim.de/ \n",
|
|
|
|
"3 https://dataverse.unc.edu/ \n",
|
|
|
|
"4 https://archaeologydataservice.ac.uk/ \n",
|
|
|
|
"\n",
|
|
|
|
" repository_id \\\n",
|
|
|
|
"0 [] \n",
|
|
|
|
"1 [RRID:SCR_010479, RRID:nlx_157752] \n",
|
|
|
|
"2 [] \n",
|
|
|
|
"3 [] \n",
|
|
|
|
"4 [FAIRsharing_doi:10.25504/FAIRsharing.hm1mfg] \n",
|
|
|
|
"\n",
|
|
|
|
" description type \\\n",
|
|
|
|
"0 The Odum Institute Archive Dataverse contains ... [disciplinary] \n",
|
|
|
|
"1 You will find in the Access to Archival Databa... [disciplinary] \n",
|
|
|
|
"2 The \"Database for Spoken German (DGD)\" is a co... [disciplinary] \n",
|
|
|
|
"3 UNC Dataverse is an open-source repository sof... [institutional] \n",
|
|
|
|
"4 The ADS is an accredited digital repository fo... [disciplinary] \n",
|
|
|
|
"\n",
|
|
|
|
" size update_date start_date \\\n",
|
|
|
|
"0 13 dataverses; 3.050 datasets 2020-12-04 NaN \n",
|
|
|
|
"1 NaN NaN 1985 \n",
|
|
|
|
"2 34 corpora 2020-02-03 2012 \n",
|
|
|
|
"3 186 dataverses; 25.272 studies; 229.442 files 2020-11-30 2011 \n",
|
|
|
|
"4 1837 results 2020-05-20 1996-10-01 \n",
|
|
|
|
"\n",
|
|
|
|
" end_date subject \\\n",
|
|
|
|
"0 NaN [1 Humanities and Social Sciences, 111 Social ... \n",
|
|
|
|
"1 NaN [1 Humanities and Social Sciences, 102 History... \n",
|
|
|
|
"2 NaN [1 Humanities and Social Sciences, 104 Linguis... \n",
|
|
|
|
"3 NaN [1 Humanities and Social Sciences, 111 Social ... \n",
|
|
|
|
"4 NaN [1 Humanities and Social Sciences, 101 Ancient... \n",
|
|
|
|
"\n",
|
|
|
|
" mission_statement content_type \\\n",
|
|
|
|
"0 false [Databases, Plain text, Scientific and statist... \n",
|
|
|
|
"1 true [Images, Standard office documents, Structured... \n",
|
|
|
|
"2 true [Audiovisual data, Standard office documents, ... \n",
|
|
|
|
"3 true [Archived data, Plain text, Raw data, Scientif... \n",
|
|
|
|
"4 true [Archived data, Audiovisual data, Databases, I... \n",
|
2021-07-22 11:35:40 +02:00
|
|
|
"\n",
|
|
|
|
" provider_type \\\n",
|
|
|
|
"0 [dataProvider] \n",
|
|
|
|
"1 [dataProvider] \n",
|
|
|
|
"2 [dataProvider, serviceProvider] \n",
|
|
|
|
"3 [dataProvider, serviceProvider] \n",
|
|
|
|
"4 [dataProvider, serviceProvider] \n",
|
|
|
|
"\n",
|
|
|
|
" keyword \\\n",
|
|
|
|
"0 [FAIR, Middle East, crime, demography, economy... \n",
|
|
|
|
"1 [US History] \n",
|
|
|
|
"2 [Australian German, FOLK, German dialects, Pfe... \n",
|
|
|
|
"3 [FAIR, census, demographic survey, demography,... \n",
|
|
|
|
"4 [FAIR, archaeology, cultural heritage, prehist... \n",
|
|
|
|
"\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" institution policy database_access \\\n",
|
|
|
|
"0 [[Odum Institute for Research in Social Scienc... true true \n",
|
|
|
|
"1 [[The U.S. National Archives and Records Admin... true true \n",
|
|
|
|
"2 [[Institut für Deutsche Sprache, Archiv für Ge... true true \n",
|
|
|
|
"3 [[Odum Institute for Research in Social Scienc... true true \n",
|
|
|
|
"4 [[Arts and Humanities Research Council, [AHRC]... true true \n",
|
|
|
|
"\n",
|
|
|
|
" database_license data_access data_license data_upload data_upload_license \\\n",
|
|
|
|
"0 true true true true false \n",
|
|
|
|
"1 false true true true false \n",
|
|
|
|
"2 false true true true false \n",
|
|
|
|
"3 false true true true true \n",
|
|
|
|
"4 true true true true true \n",
|
|
|
|
"\n",
|
|
|
|
" software versioning api pid_system citation_guideline_url aid_system \\\n",
|
|
|
|
"0 true NaN false true true true \n",
|
|
|
|
"1 true no true true true true \n",
|
|
|
|
"2 true yes false true true true \n",
|
|
|
|
"3 true yes true true true true \n",
|
|
|
|
"4 true yes true true true true \n",
|
|
|
|
"\n",
|
|
|
|
" enhanced_publication quality_management certificate metadata_standard \\\n",
|
|
|
|
"0 unknown yes true true \n",
|
|
|
|
"1 unknown unknown false false \n",
|
|
|
|
"2 unknown unknown true false \n",
|
|
|
|
"3 unknown yes false true \n",
|
|
|
|
"4 unknown yes true true \n",
|
|
|
|
"\n",
|
|
|
|
" syndication remarks entry_date \\\n",
|
|
|
|
"0 false Odum Dataverse is covered by Thomson Reuters D... 2013-06-10 \n",
|
|
|
|
"1 true NaN 2012-07-04 \n",
|
|
|
|
"2 false NaN 2012-07-20 \n",
|
|
|
|
"3 false The Odum Institute houses one of the oldest an... 2012-07-23 \n",
|
|
|
|
"4 true ADS is covered by Clarivate Data Citation Inde... 2012-07-23 \n",
|
|
|
|
"\n",
|
|
|
|
" last_update \n",
|
|
|
|
"0 2021-07-06 \n",
|
|
|
|
"1 2021-05-25 \n",
|
|
|
|
"2 2020-08-27 \n",
|
|
|
|
"3 2020-11-30 \n",
|
|
|
|
"4 2021-06-11 "
|
2021-07-22 11:35:40 +02:00
|
|
|
]
|
|
|
|
},
|
2021-07-23 15:28:23 +02:00
|
|
|
"execution_count": 2,
|
2021-07-22 11:35:40 +02:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"re3data_df = pd.read_csv('../data/raw/re3data.tsv', delimiter='\\t', \n",
|
|
|
|
" converters={'subject': ast.literal_eval,\n",
|
|
|
|
" 'keyword': ast.literal_eval,\n",
|
|
|
|
" 'additional_name': ast.literal_eval,\n",
|
|
|
|
" 'repository_id': ast.literal_eval,\n",
|
|
|
|
" 'type': ast.literal_eval,\n",
|
|
|
|
" 'content_type': ast.literal_eval,\n",
|
|
|
|
" 'provider_type': ast.literal_eval,\n",
|
|
|
|
" 'institution': ast.literal_eval\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" })\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
"re3data_df.head()"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2021-07-23 15:28:23 +02:00
|
|
|
"execution_count": 3,
|
2021-07-22 11:35:40 +02:00
|
|
|
"metadata": {},
|
2021-07-23 12:38:56 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"Index(['openaire_id', 're3data_id', 'repository_name', 'additional_name',\n",
|
|
|
|
" 'repository_url', 'repository_id', 'description', 'type', 'size',\n",
|
|
|
|
" 'update_date', 'start_date', 'end_date', 'subject', 'mission_statement',\n",
|
|
|
|
" 'content_type', 'provider_type', 'keyword', 'institution', 'policy',\n",
|
|
|
|
" 'database_access', 'database_license', 'data_access', 'data_license',\n",
|
|
|
|
" 'data_upload', 'data_upload_license', 'software', 'versioning', 'api',\n",
|
|
|
|
" 'pid_system', 'citation_guideline_url', 'aid_system',\n",
|
|
|
|
" 'enhanced_publication', 'quality_management', 'certificate',\n",
|
|
|
|
" 'metadata_standard', 'syndication', 'remarks', 'entry_date',\n",
|
|
|
|
" 'last_update'],\n",
|
|
|
|
" dtype='object')"
|
|
|
|
]
|
|
|
|
},
|
2021-07-23 15:28:23 +02:00
|
|
|
"execution_count": 3,
|
2021-07-23 12:38:56 +02:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
2021-07-22 11:35:40 +02:00
|
|
|
"source": [
|
2021-07-23 12:38:56 +02:00
|
|
|
"re3data_df.columns"
|
2021-07-22 11:35:40 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2021-07-23 15:28:23 +02:00
|
|
|
"execution_count": 4,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"def empty_list_is_nan(cell):\n",
|
|
|
|
" if isinstance(cell, list):\n",
|
|
|
|
" return np.nan if len(cell) == 0 else cell\n",
|
|
|
|
" else:\n",
|
|
|
|
" return cell\n",
|
|
|
|
" \n",
|
|
|
|
"re3data_df = re3data_df.applymap(empty_list_is_nan)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 5,
|
2021-07-23 12:38:56 +02:00
|
|
|
"metadata": {},
|
2021-07-22 11:35:40 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/html": [
|
|
|
|
"<div>\n",
|
|
|
|
"<style scoped>\n",
|
|
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
" vertical-align: middle;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe tbody tr th {\n",
|
|
|
|
" vertical-align: top;\n",
|
|
|
|
" }\n",
|
|
|
|
"\n",
|
|
|
|
" .dataframe thead th {\n",
|
|
|
|
" text-align: right;\n",
|
|
|
|
" }\n",
|
|
|
|
"</style>\n",
|
|
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
" <thead>\n",
|
|
|
|
" <tr style=\"text-align: right;\">\n",
|
|
|
|
" <th></th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <th>openaire_id</th>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <th>re3data_id</th>\n",
|
|
|
|
" <th>repository_name</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <th>additional_name</th>\n",
|
|
|
|
" <th>repository_url</th>\n",
|
|
|
|
" <th>repository_id</th>\n",
|
|
|
|
" <th>description</th>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <th>type</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <th>size</th>\n",
|
|
|
|
" <th>update_date</th>\n",
|
|
|
|
" <th>start_date</th>\n",
|
|
|
|
" <th>end_date</th>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <th>subject</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <th>mission_statement</th>\n",
|
|
|
|
" <th>content_type</th>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <th>provider_type</th>\n",
|
|
|
|
" <th>keyword</th>\n",
|
|
|
|
" <th>institution</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <th>policy</th>\n",
|
|
|
|
" <th>database_access</th>\n",
|
|
|
|
" <th>database_license</th>\n",
|
|
|
|
" <th>data_access</th>\n",
|
|
|
|
" <th>data_license</th>\n",
|
|
|
|
" <th>data_upload</th>\n",
|
|
|
|
" <th>data_upload_license</th>\n",
|
|
|
|
" <th>software</th>\n",
|
|
|
|
" <th>versioning</th>\n",
|
|
|
|
" <th>api</th>\n",
|
|
|
|
" <th>pid_system</th>\n",
|
|
|
|
" <th>citation_guideline_url</th>\n",
|
|
|
|
" <th>aid_system</th>\n",
|
|
|
|
" <th>enhanced_publication</th>\n",
|
|
|
|
" <th>quality_management</th>\n",
|
|
|
|
" <th>certificate</th>\n",
|
|
|
|
" <th>metadata_standard</th>\n",
|
|
|
|
" <th>syndication</th>\n",
|
|
|
|
" <th>remarks</th>\n",
|
|
|
|
" <th>entry_date</th>\n",
|
|
|
|
" <th>last_update</th>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" </thead>\n",
|
|
|
|
" <tbody>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>count</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
" <td>2137</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>2686</td>\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
" <td>829</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>2707</td>\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
" <td>2677</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>1260</td>\n",
|
|
|
|
" <td>1248</td>\n",
|
|
|
|
" <td>1762</td>\n",
|
|
|
|
" <td>146</td>\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
" <td>2685</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>2707</td>\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
" <td>2700</td>\n",
|
|
|
|
" <td>2699</td>\n",
|
|
|
|
" <td>2699</td>\n",
|
|
|
|
" <td>2706</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>1292</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2704</td>\n",
|
|
|
|
" <td>2705</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>1637</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>unique</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2704</td>\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
" <td>2128</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>2683</td>\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
" <td>828</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>2705</td>\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
" <td>8</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>1233</td>\n",
|
|
|
|
" <td>687</td>\n",
|
|
|
|
" <td>351</td>\n",
|
|
|
|
" <td>79</td>\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
" <td>1367</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>2</td>\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
" <td>1323</td>\n",
|
|
|
|
" <td>4</td>\n",
|
|
|
|
" <td>2474</td>\n",
|
|
|
|
" <td>2685</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>1</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>1</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>3</td>\n",
|
|
|
|
" <td>3</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>1632</td>\n",
|
|
|
|
" <td>1259</td>\n",
|
|
|
|
" <td>814</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>top</th>\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
" <td>re3data_____::4cea5a5ea78542232a51190879756661</td>\n",
|
|
|
|
" <td>r3d100011254</td>\n",
|
|
|
|
" <td>EarthChem Library</td>\n",
|
|
|
|
" <td>[IRIS]</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>http://www.jcvi.org/cms/home/</td>\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
" <td>[doi:10.17171/1-6]</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>The repository is no longer available. >>>!!!<...</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[disciplinary]</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>2 datasets</td>\n",
|
|
|
|
" <td>2019-05-15</td>\n",
|
|
|
|
" <td>2008</td>\n",
|
|
|
|
" <td>2015</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[1 Humanities and Social Sciences, 2 Life Scie...</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>[Standard office documents]</td>\n",
|
|
|
|
" <td>[dataProvider]</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>[multidisciplinary]</td>\n",
|
|
|
|
" <td>[[National Center for Biotechnology Informatio...</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>yes</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>true</td>\n",
|
|
|
|
" <td>unknown</td>\n",
|
|
|
|
" <td>yes</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>false</td>\n",
|
|
|
|
" <td>The National Institute of Standards and Techno...</td>\n",
|
|
|
|
" <td>2016-05-10</td>\n",
|
|
|
|
" <td>2021-07-02</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" <tr>\n",
|
|
|
|
" <th>freq</th>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>1</td>\n",
|
|
|
|
" <td>1</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>2</td>\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>2</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>2</td>\n",
|
|
|
|
" <td>1713</td>\n",
|
|
|
|
" <td>6</td>\n",
|
|
|
|
" <td>15</td>\n",
|
|
|
|
" <td>92</td>\n",
|
|
|
|
" <td>11</td>\n",
|
|
|
|
" <td>222</td>\n",
|
|
|
|
" <td>2286</td>\n",
|
|
|
|
" <td>30</td>\n",
|
|
|
|
" <td>1748</td>\n",
|
|
|
|
" <td>190</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" <td>6</td>\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
" <td>2394</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2134</td>\n",
|
|
|
|
" <td>2701</td>\n",
|
|
|
|
" <td>2693</td>\n",
|
|
|
|
" <td>2681</td>\n",
|
|
|
|
" <td>1988</td>\n",
|
|
|
|
" <td>2227</td>\n",
|
|
|
|
" <td>1086</td>\n",
|
|
|
|
" <td>1485</td>\n",
|
|
|
|
" <td>2448</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>2707</td>\n",
|
|
|
|
" <td>1592</td>\n",
|
|
|
|
" <td>1492</td>\n",
|
|
|
|
" <td>2481</td>\n",
|
|
|
|
" <td>1655</td>\n",
|
|
|
|
" <td>2129</td>\n",
|
|
|
|
" <td>3</td>\n",
|
|
|
|
" <td>20</td>\n",
|
|
|
|
" <td>47</td>\n",
|
2021-07-22 11:35:40 +02:00
|
|
|
" </tr>\n",
|
|
|
|
" </tbody>\n",
|
|
|
|
"</table>\n",
|
|
|
|
"</div>"
|
|
|
|
],
|
|
|
|
"text/plain": [
|
2021-07-23 12:38:56 +02:00
|
|
|
" openaire_id re3data_id \\\n",
|
|
|
|
"count 2707 2707 \n",
|
|
|
|
"unique 2707 2707 \n",
|
2021-07-23 15:28:23 +02:00
|
|
|
"top re3data_____::4cea5a5ea78542232a51190879756661 r3d100011254 \n",
|
2021-07-23 12:38:56 +02:00
|
|
|
"freq 1 1 \n",
|
|
|
|
"\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
" repository_name additional_name repository_url \\\n",
|
|
|
|
"count 2707 2137 2686 \n",
|
|
|
|
"unique 2704 2128 2683 \n",
|
|
|
|
"top EarthChem Library [IRIS] http://www.jcvi.org/cms/home/ \n",
|
|
|
|
"freq 2 2 2 \n",
|
|
|
|
"\n",
|
|
|
|
" repository_id description \\\n",
|
|
|
|
"count 829 2707 \n",
|
|
|
|
"unique 828 2705 \n",
|
|
|
|
"top [doi:10.17171/1-6] The repository is no longer available. >>>!!!<... \n",
|
|
|
|
"freq 2 2 \n",
|
|
|
|
"\n",
|
|
|
|
" type size update_date start_date end_date \\\n",
|
|
|
|
"count 2677 1260 1248 1762 146 \n",
|
|
|
|
"unique 8 1233 687 351 79 \n",
|
|
|
|
"top [disciplinary] 2 datasets 2019-05-15 2008 2015 \n",
|
|
|
|
"freq 1713 6 15 92 11 \n",
|
2021-07-23 12:38:56 +02:00
|
|
|
"\n",
|
|
|
|
" subject mission_statement \\\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
"count 2685 2707 \n",
|
|
|
|
"unique 1367 2 \n",
|
2021-07-23 12:38:56 +02:00
|
|
|
"top [1 Humanities and Social Sciences, 2 Life Scie... true \n",
|
|
|
|
"freq 222 2286 \n",
|
|
|
|
"\n",
|
|
|
|
" content_type provider_type keyword \\\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
"count 2700 2699 2699 \n",
|
|
|
|
"unique 1323 4 2474 \n",
|
2021-07-23 12:38:56 +02:00
|
|
|
"top [Standard office documents] [dataProvider] [multidisciplinary] \n",
|
|
|
|
"freq 30 1748 190 \n",
|
|
|
|
"\n",
|
|
|
|
" institution policy \\\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
"count 2706 2707 \n",
|
|
|
|
"unique 2685 2 \n",
|
2021-07-23 12:38:56 +02:00
|
|
|
"top [[National Center for Biotechnology Informatio... true \n",
|
|
|
|
"freq 6 2394 \n",
|
|
|
|
"\n",
|
|
|
|
" database_access database_license data_access data_license data_upload \\\n",
|
|
|
|
"count 2707 2707 2707 2707 2707 \n",
|
|
|
|
"unique 1 2 2 2 2 \n",
|
|
|
|
"top true false true true true \n",
|
|
|
|
"freq 2707 2134 2701 2693 2681 \n",
|
|
|
|
"\n",
|
|
|
|
" data_upload_license software versioning api pid_system \\\n",
|
|
|
|
"count 2707 2707 1292 2707 2707 \n",
|
|
|
|
"unique 2 2 2 2 2 \n",
|
|
|
|
"top false true yes false true \n",
|
|
|
|
"freq 1988 2227 1086 1485 2448 \n",
|
|
|
|
"\n",
|
|
|
|
" citation_guideline_url aid_system enhanced_publication \\\n",
|
|
|
|
"count 2707 2707 2704 \n",
|
|
|
|
"unique 1 1 3 \n",
|
|
|
|
"top true true unknown \n",
|
|
|
|
"freq 2707 2707 1592 \n",
|
|
|
|
"\n",
|
|
|
|
" quality_management certificate metadata_standard syndication \\\n",
|
|
|
|
"count 2705 2707 2707 2707 \n",
|
|
|
|
"unique 3 2 2 2 \n",
|
|
|
|
"top yes false false false \n",
|
|
|
|
"freq 1492 2481 1655 2129 \n",
|
|
|
|
"\n",
|
|
|
|
" remarks entry_date \\\n",
|
|
|
|
"count 1637 2707 \n",
|
|
|
|
"unique 1632 1259 \n",
|
|
|
|
"top The National Institute of Standards and Techno... 2016-05-10 \n",
|
|
|
|
"freq 3 20 \n",
|
|
|
|
"\n",
|
|
|
|
" last_update \n",
|
|
|
|
"count 2707 \n",
|
|
|
|
"unique 814 \n",
|
|
|
|
"top 2021-07-02 \n",
|
|
|
|
"freq 47 "
|
2021-07-22 11:35:40 +02:00
|
|
|
]
|
|
|
|
},
|
2021-07-23 15:28:23 +02:00
|
|
|
"execution_count": 5,
|
2021-07-22 11:35:40 +02:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"re3data_df.describe(include='all')"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2021-07-23 15:28:23 +02:00
|
|
|
"execution_count": 6,
|
2021-07-22 11:35:40 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
2021-07-23 12:38:56 +02:00
|
|
|
"openaire_id 0\n",
|
|
|
|
"re3data_id 0\n",
|
|
|
|
"repository_name 0\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
"additional_name 570\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
"repository_url 21\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
"repository_id 1878\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
"description 0\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
"type 30\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
"size 1447\n",
|
|
|
|
"update_date 1459\n",
|
|
|
|
"start_date 945\n",
|
|
|
|
"end_date 2561\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
"subject 22\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
"mission_statement 0\n",
|
2021-07-23 15:28:23 +02:00
|
|
|
"content_type 7\n",
|
|
|
|
"provider_type 8\n",
|
|
|
|
"keyword 8\n",
|
|
|
|
"institution 1\n",
|
2021-07-23 12:38:56 +02:00
|
|
|
"policy 0\n",
|
|
|
|
"database_access 0\n",
|
|
|
|
"database_license 0\n",
|
|
|
|
"data_access 0\n",
|
|
|
|
"data_license 0\n",
|
|
|
|
"data_upload 0\n",
|
|
|
|
"data_upload_license 0\n",
|
|
|
|
"software 0\n",
|
|
|
|
"versioning 1415\n",
|
|
|
|
"api 0\n",
|
|
|
|
"pid_system 0\n",
|
|
|
|
"citation_guideline_url 0\n",
|
|
|
|
"aid_system 0\n",
|
|
|
|
"enhanced_publication 3\n",
|
|
|
|
"quality_management 2\n",
|
|
|
|
"certificate 0\n",
|
|
|
|
"metadata_standard 0\n",
|
|
|
|
"syndication 0\n",
|
|
|
|
"remarks 1070\n",
|
|
|
|
"entry_date 0\n",
|
|
|
|
"last_update 0\n",
|
|
|
|
"dtype: int64"
|
2021-07-22 11:35:40 +02:00
|
|
|
]
|
|
|
|
},
|
2021-07-23 15:28:23 +02:00
|
|
|
"execution_count": 6,
|
2021-07-22 11:35:40 +02:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
2021-07-23 12:38:56 +02:00
|
|
|
"re3data_df.isna().sum()"
|
2021-07-22 11:35:40 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2021-07-23 15:28:23 +02:00
|
|
|
"execution_count": 7,
|
2021-07-22 11:35:40 +02:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
2021-07-23 12:38:56 +02:00
|
|
|
"array(['Databases', 'Plain text',\n",
|
|
|
|
" 'Scientific and statistical data formats',\n",
|
|
|
|
" 'Standard office documents', 'other', 'Images', 'Structured text',\n",
|
|
|
|
" 'Audiovisual data', 'Archived data', 'Raw data',\n",
|
|
|
|
" 'Software applications', 'Source code', 'Structured graphics',\n",
|
|
|
|
" 'Configuration data', 'Networkbased data', nan], dtype=object)"
|
2021-07-22 11:35:40 +02:00
|
|
|
]
|
|
|
|
},
|
2021-07-23 15:28:23 +02:00
|
|
|
"execution_count": 7,
|
2021-07-22 11:35:40 +02:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
2021-07-23 12:38:56 +02:00
|
|
|
"re3data_df.content_type.explode().unique()"
|
2021-07-22 11:35:40 +02:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
2021-07-23 12:38:56 +02:00
|
|
|
"cell_type": "code",
|
2021-07-23 15:28:23 +02:00
|
|
|
"execution_count": 8,
|
2021-07-22 11:35:40 +02:00
|
|
|
"metadata": {},
|
2021-07-23 12:38:56 +02:00
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"data": {
|
|
|
|
"text/plain": [
|
|
|
|
"array(['dataProvider', 'serviceProvider', nan], dtype=object)"
|
|
|
|
]
|
|
|
|
},
|
2021-07-23 15:28:23 +02:00
|
|
|
"execution_count": 8,
|
2021-07-23 12:38:56 +02:00
|
|
|
"metadata": {},
|
|
|
|
"output_type": "execute_result"
|
|
|
|
}
|
|
|
|
],
|
2021-07-22 11:35:40 +02:00
|
|
|
"source": [
|
2021-07-23 12:38:56 +02:00
|
|
|
"re3data_df.provider_type.explode().unique()"
|
2021-07-22 11:35:40 +02:00
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.8.3"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 4
|
|
|
|
}
|