{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Information to check\n", "- names\n", "- description\n", "- url\n", "- subjects & keywords\n", "- content type\n", "- repo type\n", "- policies\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import ast\n", "import csv\n", "import json\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px\n", "\n", "pd.set_option('display.max_columns', None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Loading dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | openaire_id | \n", "re3data_id | \n", "repository_name | \n", "additional_name | \n", "repository_url | \n", "repository_id | \n", "description | \n", "type | \n", "size | \n", "update_date | \n", "start_date | \n", "end_date | \n", "subject | \n", "mission_statement | \n", "content_type | \n", "provider_type | \n", "keyword | \n", "institution | \n", "policy | \n", "database_access | \n", "database_license | \n", "data_access | \n", "data_license | \n", "data_upload | \n", "data_upload_license | \n", "software | \n", "versioning | \n", "api | \n", "pid_system | \n", "citation_guideline_url | \n", "aid_system | \n", "enhanced_publication | \n", "quality_management | \n", "certificate | \n", "metadata_standard | \n", "syndication | \n", "remarks | \n", "entry_date | \n", "last_update | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "re3data_____::91780fe96da5ba32f804e43359c154ba | \n", "r3d100000001 | \n", "Odum Institute Archive Dataverse | \n", "[] | \n", "https://dataverse.unc.edu/dataverse/odum | \n", "[] | \n", "The Odum Institute Archive Dataverse contains ... | \n", "[disciplinary] | \n", "13 dataverses; 3.050 datasets | \n", "2020-12-04 | \n", "NaN | \n", "NaN | \n", "[1 Humanities and Social Sciences, 111 Social ... | \n", "false | \n", "[Databases, Plain text, Scientific and statist... | \n", "[dataProvider] | \n", "[FAIR, Middle East, crime, demography, economy... | \n", "[[Odum Institute for Research in Social Scienc... | \n", "true | \n", "true | \n", "true | \n", "true | \n", "true | \n", "true | \n", "false | \n", "true | \n", "NaN | \n", "false | \n", "true | \n", "true | \n", "true | \n", "unknown | \n", "yes | \n", "true | \n", "true | \n", "false | \n", "Odum Dataverse is covered by Thomson Reuters D... | \n", "2013-06-10 | \n", "2021-07-06 | \n", "
1 | \n", "re3data_____::cc3ea05c863cd49af75f7f54e0e86f09 | \n", "r3d100000002 | \n", "Access to Archival Databases | \n", "[AAD] | \n", "https://aad.archives.gov/aad/ | \n", "[RRID:SCR_010479, RRID:nlx_157752] | \n", "You will find in the Access to Archival Databa... | \n", "[disciplinary] | \n", "NaN | \n", "NaN | \n", "1985 | \n", "NaN | \n", "[1 Humanities and Social Sciences, 102 History... | \n", "true | \n", "[Images, Standard office documents, Structured... | \n", "[dataProvider] | \n", "[US History] | \n", "[[The U.S. National Archives and Records Admin... | \n", "true | \n", "true | \n", "false | \n", "true | \n", "true | \n", "true | \n", "false | \n", "true | \n", "no | \n", "true | \n", "true | \n", "true | \n", "true | \n", "unknown | \n", "unknown | \n", "false | \n", "false | \n", "true | \n", "NaN | \n", "2012-07-04 | \n", "2021-05-25 | \n", "
2 | \n", "re3data_____::a2f73fbe91311f4356d0d7957c441773 | \n", "r3d100000004 | \n", "Datenbank Gesprochenes Deutsch | \n", "[DGD, DGD2 (formerly), Database for Spoken Ger... | \n", "https://dgd.ids-mannheim.de/ | \n", "[] | \n", "The \"Database for Spoken German (DGD)\" is a co... | \n", "[disciplinary] | \n", "34 corpora | \n", "2020-02-03 | \n", "2012 | \n", "NaN | \n", "[1 Humanities and Social Sciences, 104 Linguis... | \n", "true | \n", "[Audiovisual data, Standard office documents, ... | \n", "[dataProvider, serviceProvider] | \n", "[Australian German, FOLK, German dialects, Pfe... | \n", "[[Institut für Deutsche Sprache, Archiv für Ge... | \n", "true | \n", "true | \n", "false | \n", "true | \n", "true | \n", "true | \n", "false | \n", "true | \n", "yes | \n", "false | \n", "true | \n", "true | \n", "true | \n", "unknown | \n", "unknown | \n", "true | \n", "false | \n", "false | \n", "NaN | \n", "2012-07-20 | \n", "2020-08-27 | \n", "
3 | \n", "re3data_____::0394b97eb11f19785cbca1ec830429da | \n", "r3d100000005 | \n", "UNC Dataverse | \n", "[University of North Carolina Dataverse] | \n", "https://dataverse.unc.edu/ | \n", "[] | \n", "UNC Dataverse is an open-source repository sof... | \n", "[institutional] | \n", "186 dataverses; 25.272 studies; 229.442 files | \n", "2020-11-30 | \n", "2011 | \n", "NaN | \n", "[1 Humanities and Social Sciences, 111 Social ... | \n", "true | \n", "[Archived data, Plain text, Raw data, Scientif... | \n", "[dataProvider, serviceProvider] | \n", "[FAIR, census, demographic survey, demography,... | \n", "[[Odum Institute for Research in Social Scienc... | \n", "true | \n", "true | \n", "false | \n", "true | \n", "true | \n", "true | \n", "true | \n", "true | \n", "yes | \n", "true | \n", "true | \n", "true | \n", "true | \n", "unknown | \n", "yes | \n", "false | \n", "true | \n", "false | \n", "The Odum Institute houses one of the oldest an... | \n", "2012-07-23 | \n", "2020-11-30 | \n", "
4 | \n", "re3data_____::a48f09c562b247a9919acfe195549b47 | \n", "r3d100000006 | \n", "Archaeology Data Service | \n", "[ADS] | \n", "https://archaeologydataservice.ac.uk/ | \n", "[FAIRsharing_doi:10.25504/FAIRsharing.hm1mfg] | \n", "The ADS is an accredited digital repository fo... | \n", "[disciplinary] | \n", "1837 results | \n", "2020-05-20 | \n", "1996-10-01 | \n", "NaN | \n", "[1 Humanities and Social Sciences, 101 Ancient... | \n", "true | \n", "[Archived data, Audiovisual data, Databases, I... | \n", "[dataProvider, serviceProvider] | \n", "[FAIR, archaeology, cultural heritage, prehist... | \n", "[[Arts and Humanities Research Council, [AHRC]... | \n", "true | \n", "true | \n", "true | \n", "true | \n", "true | \n", "true | \n", "true | \n", "true | \n", "yes | \n", "true | \n", "true | \n", "true | \n", "true | \n", "unknown | \n", "yes | \n", "true | \n", "true | \n", "true | \n", "ADS is covered by Clarivate Data Citation Inde... | \n", "2012-07-23 | \n", "2021-06-11 | \n", "
\n", " | openaire_id | \n", "re3data_id | \n", "repository_name | \n", "additional_name | \n", "repository_url | \n", "repository_id | \n", "description | \n", "type | \n", "size | \n", "update_date | \n", "start_date | \n", "end_date | \n", "subject | \n", "mission_statement | \n", "content_type | \n", "provider_type | \n", "keyword | \n", "institution | \n", "policy | \n", "database_access | \n", "database_license | \n", "data_access | \n", "data_license | \n", "data_upload | \n", "data_upload_license | \n", "software | \n", "versioning | \n", "api | \n", "pid_system | \n", "citation_guideline_url | \n", "aid_system | \n", "enhanced_publication | \n", "quality_management | \n", "certificate | \n", "metadata_standard | \n", "syndication | \n", "remarks | \n", "entry_date | \n", "last_update | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "2707 | \n", "2707 | \n", "2707 | \n", "2137 | \n", "2686 | \n", "829 | \n", "2707 | \n", "2677 | \n", "1260 | \n", "1248 | \n", "1762 | \n", "146 | \n", "2685 | \n", "2707 | \n", "2700 | \n", "2699 | \n", "2699 | \n", "2706 | \n", "2707 | \n", "2707 | \n", "2707 | \n", "2707 | \n", "2707 | \n", "2707 | \n", "2707 | \n", "2707 | \n", "1292 | \n", "2707 | \n", "2707 | \n", "2707 | \n", "2707 | \n", "2704 | \n", "2705 | \n", "2707 | \n", "2707 | \n", "2707 | \n", "1637 | \n", "2707 | \n", "2707 | \n", "
unique | \n", "2707 | \n", "2707 | \n", "2704 | \n", "2128 | \n", "2683 | \n", "828 | \n", "2705 | \n", "8 | \n", "1233 | \n", "687 | \n", "351 | \n", "79 | \n", "1367 | \n", "2 | \n", "1323 | \n", "4 | \n", "2474 | \n", "2685 | \n", "2 | \n", "1 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "1 | \n", "1 | \n", "3 | \n", "3 | \n", "2 | \n", "2 | \n", "2 | \n", "1632 | \n", "1259 | \n", "814 | \n", "
top | \n", "re3data_____::4cea5a5ea78542232a51190879756661 | \n", "r3d100011254 | \n", "EarthChem Library | \n", "[IRIS] | \n", "http://www.jcvi.org/cms/home/ | \n", "[doi:10.17171/1-6] | \n", "The repository is no longer available. >>>!!!<... | \n", "[disciplinary] | \n", "2 datasets | \n", "2019-05-15 | \n", "2008 | \n", "2015 | \n", "[1 Humanities and Social Sciences, 2 Life Scie... | \n", "true | \n", "[Standard office documents] | \n", "[dataProvider] | \n", "[multidisciplinary] | \n", "[[National Center for Biotechnology Informatio... | \n", "true | \n", "true | \n", "false | \n", "true | \n", "true | \n", "true | \n", "false | \n", "true | \n", "yes | \n", "false | \n", "true | \n", "true | \n", "true | \n", "unknown | \n", "yes | \n", "false | \n", "false | \n", "false | \n", "The National Institute of Standards and Techno... | \n", "2016-05-10 | \n", "2021-07-02 | \n", "
freq | \n", "1 | \n", "1 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "2 | \n", "1713 | \n", "6 | \n", "15 | \n", "92 | \n", "11 | \n", "222 | \n", "2286 | \n", "30 | \n", "1748 | \n", "190 | \n", "6 | \n", "2394 | \n", "2707 | \n", "2134 | \n", "2701 | \n", "2693 | \n", "2681 | \n", "1988 | \n", "2227 | \n", "1086 | \n", "1485 | \n", "2448 | \n", "2707 | \n", "2707 | \n", "1592 | \n", "1492 | \n", "2481 | \n", "1655 | \n", "2129 | \n", "3 | \n", "20 | \n", "47 | \n", "