diff --git a/notebooks/01.1-exploration-fairsharing.ipynb b/notebooks/01.1-exploration-fairsharing.ipynb
new file mode 100644
index 0000000..1c75245
--- /dev/null
+++ b/notebooks/01.1-exploration-fairsharing.ipynb
@@ -0,0 +1,2089 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import ast\n",
+ "import csv\n",
+ "import json\n",
+ "\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "\n",
+ "import plotly\n",
+ "from plotly.offline import iplot, init_notebook_mode\n",
+ "import plotly.graph_objs as go\n",
+ "import plotly.express as px\n",
+ "\n",
+ "pd.set_option('display.max_columns', None)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Loading datasets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
" 10001 | \n",
" {nan, 20} | \n",
- " {archive, nan} | \n",
+ " {nan, archive} | \n",
" {nan, 91} | \n",
" {nan} | \n",
" {nan} | \n",
" {nan, disk0/00/01/00/01} | \n",
- " {nan, 2015-08-08 14:52:11} | \n",
+ " {2015-08-08 14:52:11, nan} | \n",
" {nan, 2016-03-21 19:44:01} | \n",
- " {nan, 2015-08-08 14:52:11} | \n",
- " {nan, subject} | \n",
+ " {2015-08-08 14:52:11, nan} | \n",
+ " {subject, nan} | \n",
" {nan} | \n",
" {nan} | \n",
" {nan, show} | \n",
@@ -2210,16 +2210,16 @@
" {nan} | \n",
" {nan, http://edoc.sub.uni-hamburg.de/klimawand... | \n",
" {nan, Klimawandel Dokumentenserver} | \n",
- " {http://edoc.sub.uni-hamburg.de/klimawandel/oa... | \n",
+ " {nan, http://edoc.sub.uni-hamburg.de/klimawand... | \n",
" {nan} | \n",
" {nan} | \n",
" {nan} | \n",
- " {nan, The \"Documentenserver Klimawandel\" (Repo... | \n",
+ " {The \"Documentenserver Klimawandel\" (Repositor... | \n",
" {nan, TRUE} | \n",
" {nan, TRUE} | \n",
" {nan, TRUE} | \n",
- " {nan, Climate Service Center 2.0, Helmholtz-Ze... | \n",
- " {nan, http://www.climateservicecenter.de/, htt... | \n",
+ " {KLIMZUG projects, nan, Climate Service Center... | \n",
+ " {http://www.climateservicecenter.de/, http://w... | \n",
" {nan, de} | \n",
" {nan, Hamburg} | \n",
" {nan, 53.5511} | \n",
@@ -2227,7 +2227,7 @@
" {nan, opus} | \n",
" {nan, geoname_2_DE} | \n",
" {nan, other} | \n",
- " {G1, GE, HD, S1, GF} | \n",
+ " {GF, GE, G1, HD, S1} | \n",
" {nan, 2015-07-02 08:08:31} | \n",
" {nan} | \n",
" {nan} | \n",
@@ -2240,8 +2240,8 @@
" {nan} | \n",
" {nan} | \n",
" {nan} | \n",
- " {nan, opendoar, celestial} | \n",
- " {3408, nan, 5881} | \n",
+ " {nan, celestial, opendoar} | \n",
+ " {nan, 5881, 3408} | \n",
" {nan} | \n",
" {nan} | \n",
" {nan} | \n",
@@ -2338,34 +2338,34 @@
"text/plain": [
" rev_number eprint_status userid importid source \\\n",
"eprintid \n",
- "1 {nan, 633} {archive, nan} {1, nan} {nan} {nan} \n",
- "10 {nan, 511} {archive, nan} {1, nan} {nan} {nan} \n",
+ "1 {nan, 633} {nan, archive} {nan, 1} {nan} {nan} \n",
+ "10 {nan, 511} {nan, archive} {nan, 1} {nan} {nan} \n",
"1000 {274} {archive} {1} {nan} {nan} \n",
- "10001 {nan, 20} {archive, nan} {nan, 91} {nan} {nan} \n",
+ "10001 {nan, 20} {nan, archive} {nan, 91} {nan} {nan} \n",
"10008 {11} {archive} {404} {nan} {nan} \n",
"\n",
" dir datestamp \\\n",
"eprintid \n",
"1 {nan, disk0/00/00/00/01} {nan, 2010-01-06 13:43:48} \n",
- "10 {disk0/00/00/00/10, nan} {nan, 2010-01-06 13:43:48} \n",
+ "10 {nan, disk0/00/00/00/10} {nan, 2010-01-06 13:43:48} \n",
"1000 {disk0/00/00/10/00} {2010-01-06 13:45:01} \n",
- "10001 {nan, disk0/00/01/00/01} {nan, 2015-08-08 14:52:11} \n",
+ "10001 {nan, disk0/00/01/00/01} {2015-08-08 14:52:11, nan} \n",
"10008 {disk0/00/01/00/08} {2015-08-08 14:52:26} \n",
"\n",
" lastmod status_changed \\\n",
"eprintid \n",
"1 {nan, 2011-07-18 05:40:07} {nan, 2010-01-06 13:43:48} \n",
- "10 {2011-07-18 05:40:13, nan} {nan, 2010-01-06 13:43:48} \n",
+ "10 {nan, 2011-07-18 05:40:13} {nan, 2010-01-06 13:43:48} \n",
"1000 {2011-07-06 08:21:21} {2010-01-06 13:45:01} \n",
- "10001 {nan, 2016-03-21 19:44:01} {nan, 2015-08-08 14:52:11} \n",
+ "10001 {nan, 2016-03-21 19:44:01} {2015-08-08 14:52:11, nan} \n",
"10008 {2016-03-21 19:43:51} {2015-08-08 14:52:26} \n",
"\n",
" type succeeds commentary metadata_visibility \\\n",
"eprintid \n",
- "1 {nan, subject} {nan} {nan} {nan, show} \n",
+ "1 {subject, nan} {nan} {nan} {nan, show} \n",
"10 {nan, institutional} {nan} {nan} {nan, show} \n",
"1000 {subject} {nan} {nan} {show} \n",
- "10001 {nan, subject} {nan} {nan} {nan, show} \n",
+ "10001 {subject, nan} {nan} {nan} {nan, show} \n",
"10008 {institutional} {nan} {nan} {show} \n",
"\n",
" latitude longitude relation_type relation_uri item_issues_id \\\n",
@@ -2419,9 +2419,9 @@
" oai_pmh sword_endpoint \\\n",
"eprintid \n",
"1 {nan, http://archivesic.ccsd.cnrs.fr/oai/oai.php} {nan} \n",
- "10 {nan, http://www.diva-portal.org/oai/mdh/OAI} {nan} \n",
+ "10 {http://www.diva-portal.org/oai/mdh/OAI, nan} {nan} \n",
"1000 {nan} {nan} \n",
- "10001 {http://edoc.sub.uni-hamburg.de/klimawandel/oa... {nan} \n",
+ "10001 {nan, http://edoc.sub.uni-hamburg.de/klimawand... {nan} \n",
"10008 {http://creativematter.skidmore.edu/do/oai/} {nan} \n",
"\n",
" rss_feed twitter_feed \\\n",
@@ -2437,7 +2437,7 @@
"1 {nan} {nan} \n",
"10 {nan} {nan, TRUE} \n",
"1000 {nan} {TRUE} \n",
- "10001 {nan, The \"Documentenserver Klimawandel\" (Repo... {nan, TRUE} \n",
+ "10001 {The \"Documentenserver Klimawandel\" (Repositor... {nan, TRUE} \n",
"10008 {Welcome to Creative Matter, a repository for ... {TRUE} \n",
"\n",
" open_access mandate \\\n",
@@ -2453,21 +2453,21 @@
"1 {nan} \n",
"10 {nan} \n",
"1000 {nan} \n",
- "10001 {nan, Climate Service Center 2.0, Helmholtz-Ze... \n",
+ "10001 {KLIMZUG projects, nan, Climate Service Center... \n",
"10008 {Skidmore College} \n",
"\n",
" organisation_home_page location_country \\\n",
"eprintid \n",
"1 {nan} {nan, fr} \n",
- "10 {nan} {nan, se} \n",
+ "10 {nan} {se, nan} \n",
"1000 {nan} {pt} \n",
- "10001 {nan, http://www.climateservicecenter.de/, htt... {nan, de} \n",
+ "10001 {http://www.climateservicecenter.de/, http://w... {nan, de} \n",
"10008 {http://www.skidmore.edu/} {us} \n",
"\n",
" location_city location_latitude location_longitude \\\n",
"eprintid \n",
"1 {nan} {nan} {nan} \n",
- "10 {Uppsala, nan} {nan, 59.8667} {17.6333, nan} \n",
+ "10 {nan, Uppsala} {nan, 59.8667} {nan, 17.6333} \n",
"1000 {Bellevue, WA} {47.6034} {-122.155} \n",
"10001 {nan, Hamburg} {nan, 53.5511} {nan, 9.9937} \n",
"10008 {Saratoga Springs} {43.0961} {-73.7818} \n",
@@ -2485,7 +2485,7 @@
"1 {nan} {nan, 2002-05-17 19:24:41} {nan} {nan} \n",
"10 {nan} {nan, 2005-12-08 13:15:22} {nan} {nan} \n",
"1000 {nan} {2006-05-04 10:48:14} {nan} {nan} \n",
- "10001 {G1, GE, HD, S1, GF} {nan, 2015-07-02 08:08:31} {nan} {nan} \n",
+ "10001 {GF, GE, G1, HD, S1} {nan, 2015-07-02 08:08:31} {nan} {nan} \n",
"10008 {nan} {2015-07-06 17:35:50} {nan} {nan} \n",
"\n",
" activity_low activity_medium activity_high recordcount \\\n",
@@ -2514,10 +2514,10 @@
"\n",
" registry_name registry_id submit_to \\\n",
"eprintid \n",
- "1 {opendoar, celestial} {669, 58} {nan} \n",
- "10 {opendoar, celestial} {258, 526} {nan} \n",
+ "1 {celestial, opendoar} {669, 58} {nan} \n",
+ "10 {celestial, opendoar} {526, 258} {nan} \n",
"1000 {nan} {nan} {nan} \n",
- "10001 {nan, opendoar, celestial} {3408, nan, 5881} {nan} \n",
+ "10001 {nan, celestial, opendoar} {nan, 5881, 3408} {nan} \n",
"10008 {celestial} {5882} {nan} \n",
"\n",
" submitted_to_name submitted_to_done webometrics_rank \\\n",
@@ -2798,7 +2798,7 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " [opendoar, celestial] | \n",
+ " [celestial, opendoar] | \n",
" [669, 58] | \n",
" NaN | \n",
" NaN | \n",
@@ -2875,8 +2875,8 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " [opendoar, celestial] | \n",
- " [258, 526] | \n",
+ " [celestial, opendoar] | \n",
+ " [526, 258] | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -3007,7 +3007,7 @@
" TRUE | \n",
" TRUE | \n",
" TRUE | \n",
- " [Climate Service Center 2.0, Helmholtz-Zentrum... | \n",
+ " [KLIMZUG projects, Climate Service Center 2.0,... | \n",
" [http://www.climateservicecenter.de/, http://w... | \n",
" de | \n",
" Hamburg | \n",
@@ -3016,7 +3016,7 @@
" opus | \n",
" geoname_2_DE | \n",
" other | \n",
- " [G1, S1, GF, GE, HD] | \n",
+ " [GE, GF, G1, HD, S1] | \n",
" 2015-07-02 08:08:31 | \n",
" NaN | \n",
" NaN | \n",
@@ -3029,8 +3029,8 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " [opendoar, celestial] | \n",
- " [3408, 5881] | \n",
+ " [celestial, opendoar] | \n",
+ " [5881, 3408] | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -3234,7 +3234,7 @@
"1 NaN \n",
"10 NaN \n",
"1000 NaN \n",
- "10001 [Climate Service Center 2.0, Helmholtz-Zentrum... \n",
+ "10001 [KLIMZUG projects, Climate Service Center 2.0,... \n",
"10008 Skidmore College \n",
"\n",
" organisation_home_page location_country \\\n",
@@ -3258,7 +3258,7 @@
"1 geoname_2_FR other NaN 2002-05-17 19:24:41 \n",
"10 geoname_2_SE other NaN 2005-12-08 13:15:22 \n",
"1000 geoname_2_PT other NaN 2006-05-04 10:48:14 \n",
- "10001 geoname_2_DE other [G1, S1, GF, GE, HD] 2015-07-02 08:08:31 \n",
+ "10001 geoname_2_DE other [GE, GF, G1, HD, S1] 2015-07-02 08:08:31 \n",
"10008 geoname_2_US other NaN 2015-07-06 17:35:50 \n",
"\n",
" note suggestions activity_low activity_medium activity_high \\\n",
@@ -3287,10 +3287,10 @@
"\n",
" registry_name registry_id submit_to submitted_to_name \\\n",
"eprintid \n",
- "1 [opendoar, celestial] [669, 58] NaN NaN \n",
- "10 [opendoar, celestial] [258, 526] NaN NaN \n",
+ "1 [celestial, opendoar] [669, 58] NaN NaN \n",
+ "10 [celestial, opendoar] [526, 258] NaN NaN \n",
"1000 NaN NaN NaN NaN \n",
- "10001 [opendoar, celestial] [3408, 5881] NaN NaN \n",
+ "10001 [celestial, opendoar] [5881, 3408] NaN NaN \n",
"10008 celestial 5882 NaN NaN \n",
"\n",
" submitted_to_done webometrics_rank webometrics_size \\\n",
@@ -3505,8 +3505,8 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
- " [opendoar, celestial, roarmap] | \n",
- " [166, 1106, 69] | \n",
+ " [roarmap, celestial, opendoar] | \n",
+ " [69, 166, 1106] | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -3573,7 +3573,7 @@
"4259 NaN NaN NaN \n",
"\n",
" registry_name registry_id submit_to \\\n",
- "4259 [opendoar, celestial, roarmap] [166, 1106, 69] NaN \n",
+ "4259 [roarmap, celestial, opendoar] [69, 166, 1106] NaN \n",
"\n",
" submitted_to_name submitted_to_done webometrics_rank webometrics_size \\\n",
"4259 NaN NaN 1 6 \n",
@@ -3831,7 +3831,7 @@
" 31 | \n",
" 126 | \n",
" 53 | \n",
- " 938 | \n",
+ " 937 | \n",
" 4898 | \n",
" 210 | \n",
" 173 | \n",
@@ -3844,8 +3844,8 @@
" 118 | \n",
" 134 | \n",
" 117 | \n",
- " 9 | \n",
- " 4259 | \n",
+ " 7 | \n",
+ " 4260 | \n",
" 7 | \n",
" 1 | \n",
" 1 | \n",
@@ -3922,9 +3922,9 @@
" 0 | \n",
" 0 | \n",
" 0 | \n",
- " [opendoar, celestial] | \n",
+ " [celestial, opendoar] | \n",
" [1879, 2246] | \n",
- " [opendoar, celestial, roarmap] | \n",
+ " [roarmap, celestial, opendoar] | \n",
" opendoar | \n",
" 2021-01-25 | \n",
" 24 | \n",
@@ -4775,7 +4775,7 @@
"\n",
" software geoname version subjects date \\\n",
"count 4700 4730 5444 1289 5429 \n",
- "unique 31 126 53 938 4898 \n",
+ "unique 31 126 53 937 4898 \n",
"top dspace geoname_2_US other K1 2006-05-04 10:48:14 \n",
"freq 2341 845 4841 53 99 \n",
"mean NaN NaN NaN NaN NaN \n",
@@ -4840,8 +4840,8 @@
"\n",
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name \\\n",
"count 258 270 258 4605 \n",
- "unique 118 134 117 9 \n",
- "top 0 0 0 [opendoar, celestial] \n",
+ "unique 118 134 117 7 \n",
+ "top 0 0 0 [celestial, opendoar] \n",
"freq 114 113 114 2106 \n",
"mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n",
@@ -4853,8 +4853,8 @@
"\n",
" registry_id submit_to submitted_to_name \\\n",
"count 4580 375 205 \n",
- "unique 4259 7 1 \n",
- "top [1879, 2246] [opendoar, celestial, roarmap] opendoar \n",
+ "unique 4260 7 1 \n",
+ "top [1879, 2246] [roarmap, celestial, opendoar] opendoar \n",
"freq 4 119 205 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
diff --git a/notebooks/02-subjects&geographic.ipynb b/notebooks/02-subjects&geographic.ipynb
index b8c9746..6428f40 100644
--- a/notebooks/02-subjects&geographic.ipynb
+++ b/notebooks/02-subjects&geographic.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@@ -29,7 +29,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@@ -77,7 +77,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 3,
"metadata": {},
"outputs": [
{
@@ -560,7 +560,7 @@
"4 ADS is covered by Clarivate Data Citation Inde... 2012-07-23 2021-09-02 "
]
},
- "execution_count": 15,
+ "execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@@ -582,7 +582,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 4,
"metadata": {
"scrolled": false
},
@@ -936,7 +936,7 @@
"freq 2235 17 20 104 "
]
},
- "execution_count": 16,
+ "execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@@ -954,7 +954,7 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -1207,7 +1207,7 @@
"4 42.0 "
]
},
- "execution_count": 17,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -1226,7 +1226,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -1689,7 +1689,7 @@
"max 4.200000e+08 "
]
},
- "execution_count": 18,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@@ -1707,7 +1707,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -1874,7 +1874,7 @@
" NaN | \n",
" NaN | \n",
" [opendoar, celestial] | \n",
- " [58, 669] | \n",
+ " [669, 58] | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1952,7 +1952,7 @@
" NaN | \n",
" NaN | \n",
" [opendoar, celestial] | \n",
- " [526, 258] | \n",
+ " [258, 526] | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -2085,7 +2085,7 @@
" TRUE | \n",
" TRUE | \n",
" TRUE | \n",
- " [Climate Service Center 2.0, KLIMZUG projects,... | \n",
+ " [Helmholtz-Zentrum Geesthacht, Climate Service... | \n",
" [http://www.climateservicecenter.de/, http://w... | \n",
" de | \n",
" Hamburg | \n",
@@ -2094,7 +2094,7 @@
" opus | \n",
" geoname_2_DE | \n",
" other | \n",
- " [GE, GF, HD, S1, G1] | \n",
+ " [GE, S1, GF, HD, G1] | \n",
" 2015-07-02 08:08:31 | \n",
" NaN | \n",
" NaN | \n",
@@ -2108,7 +2108,7 @@
" NaN | \n",
" NaN | \n",
" [opendoar, celestial] | \n",
- " [5881, 3408] | \n",
+ " [3408, 5881] | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -2299,7 +2299,7 @@
"0 NaN NaN \n",
"1 NaN NaN \n",
"2 NaN NaN \n",
- "3 TRUE [Climate Service Center 2.0, KLIMZUG projects,... \n",
+ "3 TRUE [Helmholtz-Zentrum Geesthacht, Climate Service... \n",
"4 FALSE Skidmore College \n",
"\n",
" organisation_home_page location_country \\\n",
@@ -2320,7 +2320,7 @@
"0 geoname_2_FR other NaN 2002-05-17 19:24:41 NaN \n",
"1 geoname_2_SE other NaN 2005-12-08 13:15:22 NaN \n",
"2 geoname_2_PT other NaN 2006-05-04 10:48:14 NaN \n",
- "3 geoname_2_DE other [GE, GF, HD, S1, G1] 2015-07-02 08:08:31 NaN \n",
+ "3 geoname_2_DE other [GE, S1, GF, HD, G1] 2015-07-02 08:08:31 NaN \n",
"4 geoname_2_US other NaN 2015-07-06 17:35:50 NaN \n",
"\n",
" suggestions activity_low activity_medium activity_high recordcount \\\n",
@@ -2345,10 +2345,10 @@
"4 NaN NaN NaN celestial \n",
"\n",
" registry_id submit_to submitted_to_name submitted_to_done \\\n",
- "0 [58, 669] NaN NaN NaN \n",
- "1 [526, 258] NaN NaN NaN \n",
+ "0 [669, 58] NaN NaN NaN \n",
+ "1 [258, 526] NaN NaN NaN \n",
"2 NaN NaN NaN NaN \n",
- "3 [5881, 3408] NaN NaN NaN \n",
+ "3 [3408, 5881] NaN NaN NaN \n",
"4 5882 NaN NaN NaN \n",
"\n",
" webometrics_rank webometrics_size webometrics_visibility \\\n",
@@ -2373,7 +2373,7 @@
"4 NaN "
]
},
- "execution_count": 19,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -2399,7 +2399,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -2644,7 +2644,7 @@
" 134 | \n",
" 117 | \n",
" 7 | \n",
- " 4259 | \n",
+ " 4261 | \n",
" 7 | \n",
" 1 | \n",
" 1 | \n",
@@ -2723,7 +2723,7 @@
" 0 | \n",
" [opendoar, celestial] | \n",
" 2479 | \n",
- " [roarmap, opendoar, celestial] | \n",
+ " [opendoar, roarmap, celestial] | \n",
" opendoar | \n",
" 2021-01-25 | \n",
" 24 | \n",
@@ -3652,8 +3652,8 @@
"\n",
" registry_id submit_to submitted_to_name \\\n",
"count 4580 375 205 \n",
- "unique 4259 7 1 \n",
- "top 2479 [roarmap, opendoar, celestial] opendoar \n",
+ "unique 4261 7 1 \n",
+ "top 2479 [opendoar, roarmap, celestial] opendoar \n",
"freq 4 119 205 \n",
"mean NaN NaN NaN \n",
"std NaN NaN NaN \n",
@@ -3716,7 +3716,7 @@
"max NaN "
]
},
- "execution_count": 20,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -3734,7 +3734,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -3769,9 +3769,11 @@
" attributes.metadata.homepage | \n",
" attributes.metadata.identifier | \n",
" attributes.metadata.description | \n",
+ " attributes.metadata.abbreviation | \n",
" attributes.metadata.support-links | \n",
" attributes.metadata.year-creation | \n",
" attributes.metadata.data-processes | \n",
+ " attributes.metadata.cross-references | \n",
" attributes.legacy-ids | \n",
" attributes.fairsharing-registry | \n",
" attributes.record-type | \n",
@@ -3788,48 +3790,78 @@
" attributes.description | \n",
" attributes.publications | \n",
" attributes.licence-links | \n",
+ " attributes.url-for-logo | \n",
" attributes.metadata.citations | \n",
- " attributes.metadata.abbreviation | \n",
- " attributes.metadata.access-points | \n",
" attributes.metadata.associated-tools | \n",
- " attributes.metadata.deprecation-date | \n",
" attributes.metadata.deprecation-reason | \n",
+ " attributes.metadata.data-access-condition.type | \n",
+ " attributes.metadata.data-contact-information | \n",
+ " attributes.metadata.data-deposition-condition.url | \n",
+ " attributes.metadata.data-deposition-condition.type | \n",
+ " attributes.metadata.deprecation-date | \n",
+ " attributes.metadata.access-points | \n",
+ " attributes.metadata.data-access-condition.url | \n",
+ " attributes.metadata.resource-sustainability.url | \n",
+ " attributes.metadata.resource-sustainability.name | \n",
+ " attributes.metadata.data-preservation-policy.url | \n",
+ " attributes.metadata.data-preservation-policy.name | \n",
+ " attributes.metadata.data-access-for-pre-publication-review | \n",
+ " attributes.metadata.data-versioning | \n",
+ " attributes.metadata.data-curation.type | \n",
+ " attributes.metadata.data-curation.url | \n",
+ " attributes.metadata.citation-to-related-publications | \n",
" attributes.metadata.tombstone | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
- " 1723 | \n",
+ " 3226 | \n",
" fairsharing-records | \n",
- " 2014-11-04T15:23:40.000Z | \n",
- " 2021-09-30T11:39:06.829Z | \n",
- " 10.25504/FAIRsharing.8t18te | \n",
- " Cell Image Library | \n",
+ " 2020-12-09T11:53:44.000Z | \n",
+ " 2022-02-08T10:42:36.452Z | \n",
+ " 10.25504/FAIRsharing.d6423b | \n",
+ " WDC Sunspot Index and Long-term Solar Observat... | \n",
" ready | \n",
- " [{'contact-name': 'David Orloff', 'contact-ema... | \n",
- " http://www.cellimagelibrary.org | \n",
- " 1723 | \n",
- " This library is a public and easily accessible... | \n",
- " [{'url': 'http://www.cellimagelibrary.org/page... | \n",
- " 2010.0 | \n",
- " [{'name': 'live update', 'type': 'data release... | \n",
- " [biodbcore-000180, bsg-d000180] | \n",
+ " [{'contact-name': 'Frédéric Clette', 'contact-... | \n",
+ " http://sidc.be/silso/home | \n",
+ " 3226 | \n",
+ " The WDC-SILSO is an activity of the Operationa... | \n",
+ " WDC-SILSO | \n",
+ " [{'url': 'http://www.sidc.be/silso/taxonomy/te... | \n",
+ " 2013.0 | \n",
+ " [{'url': 'http://www.sidc.be/silso/datafiles',... | \n",
+ " [{'url': 'https://www.re3data.org/repository/r... | \n",
+ " [biodbcore-001740, bsg-d001740] | \n",
" Database | \n",
" repository | \n",
- " [Cell Biology, Life Science] | \n",
- " [Cell, Microscopy, Light microscopy, Electron ... | \n",
- " [All] | \n",
- " [] | \n",
- " [United States] | \n",
- " FAIRsharing record for: Cell Image Library | \n",
- " None | \n",
- " https://fairsharing.org/10.25504/FAIRsharing.8... | \n",
- " 10.25504/FAIRsharing.8t18te | \n",
+ " [Electromagnetism, Astrophysics and Astronomy,... | \n",
+ " [Climate, Observation design] | \n",
+ " [Not applicable] | \n",
+ " [Climate change, earth observation, Electromag... | \n",
+ " [Belgium] | \n",
+ " FAIRsharing record for: WDC Sunspot Index and ... | \n",
+ " WDC-SILSO | \n",
+ " https://fairsharing.org/10.25504/FAIRsharing.d... | \n",
+ " 10.25504/FAIRsharing.d6423b | \n",
" https://creativecommons.org/licenses/by-sa/4.0... | \n",
- " This FAIRsharing record describes: This librar... | \n",
- " [{'id': 232, 'pubmed_id': 23203874, 'title': '... | \n",
- " [{'licence-name': 'Cell Image Library Data Pol... | \n",
+ " This FAIRsharing record describes: The WDC-SIL... | \n",
+ " [] | \n",
+ " [{'licence-name': 'SILSO legal notices', 'lice... | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -3840,36 +3872,52 @@
"
\n",
" \n",
" 1 | \n",
- " 3101 | \n",
+ " 2114 | \n",
" fairsharing-records | \n",
- " 2020-09-16T08:49:13.000Z | \n",
- " 2021-09-30T11:36:45.452Z | \n",
- " NaN | \n",
- " WHOI Ship Data-Grabber System | \n",
+ " 2014-11-04T15:23:40.000Z | \n",
+ " 2022-01-21T14:39:02.195Z | \n",
+ " 10.25504/FAIRsharing.p06nme | \n",
+ " Biological Magnetic Resonance Data Bank | \n",
" ready | \n",
- " NaN | \n",
- " http://4dgeo.whoi.edu/shipdata/SDG_shipdata.html | \n",
- " 3101 | \n",
- " The WHOI Ship DataGrabber system provides the ... | \n",
- " [{'url': 'http://4dgeo.whoi.edu/shipdata/SDG_o... | \n",
- " 2004.0 | \n",
- " [{'url': 'http://4dgeo.whoi.edu/sdg-bin/dv_mai... | \n",
- " [biodbcore-001609, bsg-d001609] | \n",
+ " [{'contact-name': 'Helpdesk', 'contact-email':... | \n",
+ " https://bmrb.io/ | \n",
+ " 2114 | \n",
+ " BMRB collects, annotates, archives, and dissem... | \n",
+ " BMRB | \n",
+ " [{'url': 'https://bmrb.io/bmrb/news/', 'name':... | \n",
+ " 1988.0 | \n",
+ " [{'url': 'https://bmrb.io/data_library/rsync.s... | \n",
+ " [{'url': 'https://www.re3data.org/repository/r... | \n",
+ " [biodbcore-000584, bsg-d000584] | \n",
" Database | \n",
" repository | \n",
- " [Earth Science, Water Research, Oceanography] | \n",
+ " [Structural Biology] | \n",
+ " [Molecular structure, Protein structure, Pepti... | \n",
+ " [All] | \n",
" [] | \n",
- " [Not applicable] | \n",
- " [subseafloor environments] | \n",
" [United States] | \n",
- " FAIRsharing record for: WHOI Ship Data-Grabber... | \n",
- " None | \n",
- " https://fairsharing.org/fairsharing_records/3101 | \n",
- " None | \n",
+ " FAIRsharing record for: Biological Magnetic Re... | \n",
+ " BMRB | \n",
+ " https://fairsharing.org/10.25504/FAIRsharing.p... | \n",
+ " 10.25504/FAIRsharing.p06nme | \n",
" https://creativecommons.org/licenses/by-sa/4.0... | \n",
- " This FAIRsharing record describes: The WHOI Sh... | \n",
- " [] | \n",
- " [{'licence-name': 'NDSF Data Archive Policy', ... | \n",
+ " This FAIRsharing record describes: BMRB collec... | \n",
+ " [{'id': 552, 'pubmed_id': 18288446, 'title': '... | \n",
+ " [{'licence-name': 'wwPDB Privacy and Usage Pol... | \n",
+ " None | \n",
+ " [{'doi': '10.1093/nar/gkm957', 'pubmed-id': 17... | \n",
+ " [{'url': 'https://bmrb.io/validate/', 'name': ... | \n",
+ " | \n",
+ " open | \n",
+ " yes | \n",
+ " https://bmrb.io/deposit/ | \n",
+ " open | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -3880,78 +3928,110 @@
"
\n",
" \n",
" 2 | \n",
- " 2649 | \n",
+ " 3022 | \n",
" fairsharing-records | \n",
- " 2018-08-07T20:23:32.000Z | \n",
- " 2021-09-30T11:39:07.898Z | \n",
- " NaN | \n",
- " Electron Microscope Public Image Archive | \n",
+ " 2020-06-17T10:25:30.000Z | \n",
+ " 2022-02-08T10:41:04.073Z | \n",
+ " 10.25504/FAIRsharing.8b7a2f | \n",
+ " Fisheries and Oceans Canada Pacific Region Dat... | \n",
" ready | \n",
- " [{'contact-name': 'General contact', 'contact-... | \n",
- " https://www.ebi.ac.uk/pdbe/emdb/empiar/ | \n",
- " 2649 | \n",
- " EMPIAR, the Electron Microscopy Public Image A... | \n",
- " [{'url': 'https://www.ebi.ac.uk/support/EMPIAR... | \n",
- " 2015.0 | \n",
- " [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... | \n",
- " [biodbcore-001140, bsg-d001140] | \n",
+ " [{'contact-name': 'Peter Chandler', 'contact-e... | \n",
+ " http://www.pac.dfo-mpo.gc.ca/science/oceans/da... | \n",
+ " 3022 | \n",
+ " The Institute of Ocean Sciences (IOS)/Ocean Sc... | \n",
+ " None | \n",
+ " [{'url': 'DFO.PAC.SCI.IOSData-DonneesISO.SCI.P... | \n",
+ " NaN | \n",
+ " [{'name': 'Users must contact the Senior Analy... | \n",
+ " [{'url': 'https://www.re3data.org/repository/r... | \n",
+ " [biodbcore-001530, bsg-d001530] | \n",
" Database | \n",
" repository | \n",
- " [Bioinformatics, Biology] | \n",
- " [Protein image, Microscopy, Electron microscop... | \n",
- " [All] | \n",
- " [] | \n",
- " [Greece, Czech Republic, United Kingdom, Icela... | \n",
- " FAIRsharing record for: Electron Microscope Pu... | \n",
- " EMPIAR | \n",
- " https://fairsharing.org/fairsharing_records/2649 | \n",
+ " [Environmental Science, Meteorology, Earth Sci... | \n",
+ " [Climate] | \n",
+ " [Not applicable] | \n",
+ " [Salinity, Temperature] | \n",
+ " [Canada] | \n",
+ " FAIRsharing record for: Fisheries and Oceans C... | \n",
" None | \n",
+ " https://fairsharing.org/10.25504/FAIRsharing.8... | \n",
+ " 10.25504/FAIRsharing.8b7a2f | \n",
" https://creativecommons.org/licenses/by-sa/4.0... | \n",
- " This FAIRsharing record describes: EMPIAR, the... | \n",
- " [{'id': 2232, 'pubmed_id': 27067018, 'title': ... | \n",
- " [{'licence-name': 'EMBL-EBI Terms of Use', 'li... | \n",
- " [{'doi': '10.1038/nmeth.3806', 'pubmed-id': 27... | \n",
- " EMPIAR | \n",
- " [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... | \n",
- " [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... | \n",
+ " This FAIRsharing record describes: The Institu... | \n",
+ " [] | \n",
+ " [{'licence-name': 'Fisheries and Oceans Canada... | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 3 | \n",
- " 2657 | \n",
+ " 2998 | \n",
" fairsharing-records | \n",
- " 2018-08-13T15:12:11.000Z | \n",
- " 2021-09-30T11:37:28.736Z | \n",
- " 10.25504/FAIRsharing.tnByoG | \n",
- " ClinicalStudyDataRequest.com | \n",
+ " 2020-05-21T07:42:30.000Z | \n",
+ " 2022-02-08T10:40:19.531Z | \n",
+ " 10.25504/FAIRsharing.e08886 | \n",
+ " Climate Prediction Center | \n",
" ready | \n",
- " [{'contact-email': 'support@clinicalstudydatar... | \n",
- " https://clinicalstudydatarequest.com/ | \n",
- " 2657 | \n",
- " ClinicalStudyDataRequest.com (CSDR) is a conso... | \n",
- " [{'url': 'https://clinicalstudydatarequest.com... | \n",
- " 2014.0 | \n",
- " [{'url': 'https://clinicalstudydatarequest.com... | \n",
- " [biodbcore-001149, bsg-d001149] | \n",
+ " [{'contact-name': 'Jon Hoopingarner', 'contact... | \n",
+ " https://www.cpc.ncep.noaa.gov/ | \n",
+ " 2998 | \n",
+ " The Climate Prediction Center (CPC) produces o... | \n",
+ " CPC | \n",
+ " [{'url': 'https://www.cpc.ncep.noaa.gov/commen... | \n",
+ " 1970.0 | \n",
+ " [{'url': 'https://www.cpc.ncep.noaa.gov/', 'na... | \n",
+ " [{'url': 'https://www.re3data.org/repository/r... | \n",
+ " [biodbcore-001504, bsg-d001504] | \n",
" Database | \n",
" repository | \n",
- " [Preclinical Studies, Biomedical Science] | \n",
- " [] | \n",
- " [Homo sapiens] | \n",
- " [] | \n",
- " [Worldwide] | \n",
- " FAIRsharing record for: ClinicalStudyDataReque... | \n",
- " CSDR | \n",
- " https://fairsharing.org/10.25504/FAIRsharing.t... | \n",
- " 10.25504/FAIRsharing.tnByoG | \n",
+ " [Hydrogeology, Geography, Meteorology, Geodesy... | \n",
+ " [Climate] | \n",
+ " [Not applicable] | \n",
+ " [Forecasting, weather] | \n",
+ " [United States] | \n",
+ " FAIRsharing record for: Climate Prediction Center | \n",
+ " CPC | \n",
+ " https://fairsharing.org/10.25504/FAIRsharing.e... | \n",
+ " 10.25504/FAIRsharing.e08886 | \n",
" https://creativecommons.org/licenses/by-sa/4.0... | \n",
- " This FAIRsharing record describes: ClinicalStu... | \n",
+ " This FAIRsharing record describes: The Climate... | \n",
" [] | \n",
- " [{'licence-name': 'CSDR Data Sharing Agreement... | \n",
+ " [{'licence-name': 'National Weather Service Di... | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" NaN | \n",
- " CSDR | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -3960,38 +4040,54 @@
"
\n",
" \n",
" 4 | \n",
- " 2078 | \n",
+ " 2301 | \n",
" fairsharing-records | \n",
- " 2014-11-04T15:23:40.000Z | \n",
- " 2021-09-30T11:34:43.129Z | \n",
- " 10.25504/FAIRsharing.3axym7 | \n",
- " Germplasm Resources Information Network | \n",
- " ready | \n",
- " [{'contact-email': 'dbmu@ars-grin.gov'}] | \n",
- " https://www.ars-grin.gov/ | \n",
- " 2078 | \n",
- " GRIN provides National Genetic Resources Progr... | \n",
- " [{'url': 'https://www.ars-grin.gov/Pages/Colle... | \n",
- " 2010.0 | \n",
- " [{'url': 'https://www.ars-grin.gov/', 'name': ... | \n",
- " [biodbcore-000546, bsg-d000546] | \n",
+ " 2016-06-03T14:54:08.000Z | \n",
+ " 2021-11-24T13:17:51.201Z | \n",
+ " 10.25504/FAIRsharing.meh9wz | \n",
+ " Acytostelium Gene Database | \n",
+ " deprecated | \n",
+ " [{'contact-name': 'Acytostelium genome consort... | \n",
+ " http://cosmos.bot.kyoto-u.ac.jp/acytodb//cgi-b... | \n",
+ " 2301 | \n",
+ " Genome and transcriptome database of Acytostel... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2008.0 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " [biodbcore-000775, bsg-d000775] | \n",
" Database | \n",
" repository | \n",
- " [Life Science] | \n",
- " [Cell, Cell culture, Germplasm] | \n",
- " [Bacteria, Metazoa, Viridiplantae] | \n",
+ " [Genomics, Life Science, Transcriptomics] | \n",
+ " [DNA sequence data, Gene model annotation] | \n",
+ " [Acytostelium subglobosum] | \n",
" [] | \n",
- " [United States] | \n",
- " FAIRsharing record for: Germplasm Resources In... | \n",
- " GRIN | \n",
- " https://fairsharing.org/10.25504/FAIRsharing.3... | \n",
- " 10.25504/FAIRsharing.3axym7 | \n",
+ " [United Kingdom, Japan] | \n",
+ " FAIRsharing record for: Acytostelium Gene Data... | \n",
+ " None | \n",
+ " https://fairsharing.org/10.25504/FAIRsharing.m... | \n",
+ " 10.25504/FAIRsharing.meh9wz | \n",
" https://creativecommons.org/licenses/by-sa/4.0... | \n",
- " This FAIRsharing record describes: GRIN provid... | \n",
- " [] | \n",
+ " This FAIRsharing record describes: Genome and ... | \n",
+ " [{'id': 1139, 'pubmed_id': 25758444, 'title': ... | \n",
" [] | \n",
+ " None | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " This resource is no longer available at the st... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2021-9-17 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" NaN | \n",
- " GRIN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -4004,130 +4100,151 @@
],
"text/plain": [
" id type attributes.created-at \\\n",
- "0 1723 fairsharing-records 2014-11-04T15:23:40.000Z \n",
- "1 3101 fairsharing-records 2020-09-16T08:49:13.000Z \n",
- "2 2649 fairsharing-records 2018-08-07T20:23:32.000Z \n",
- "3 2657 fairsharing-records 2018-08-13T15:12:11.000Z \n",
- "4 2078 fairsharing-records 2014-11-04T15:23:40.000Z \n",
+ "0 3226 fairsharing-records 2020-12-09T11:53:44.000Z \n",
+ "1 2114 fairsharing-records 2014-11-04T15:23:40.000Z \n",
+ "2 3022 fairsharing-records 2020-06-17T10:25:30.000Z \n",
+ "3 2998 fairsharing-records 2020-05-21T07:42:30.000Z \n",
+ "4 2301 fairsharing-records 2016-06-03T14:54:08.000Z \n",
"\n",
" attributes.updated-at attributes.metadata.doi \\\n",
- "0 2021-09-30T11:39:06.829Z 10.25504/FAIRsharing.8t18te \n",
- "1 2021-09-30T11:36:45.452Z NaN \n",
- "2 2021-09-30T11:39:07.898Z NaN \n",
- "3 2021-09-30T11:37:28.736Z 10.25504/FAIRsharing.tnByoG \n",
- "4 2021-09-30T11:34:43.129Z 10.25504/FAIRsharing.3axym7 \n",
+ "0 2022-02-08T10:42:36.452Z 10.25504/FAIRsharing.d6423b \n",
+ "1 2022-01-21T14:39:02.195Z 10.25504/FAIRsharing.p06nme \n",
+ "2 2022-02-08T10:41:04.073Z 10.25504/FAIRsharing.8b7a2f \n",
+ "3 2022-02-08T10:40:19.531Z 10.25504/FAIRsharing.e08886 \n",
+ "4 2021-11-24T13:17:51.201Z 10.25504/FAIRsharing.meh9wz \n",
"\n",
- " attributes.metadata.name attributes.metadata.status \\\n",
- "0 Cell Image Library ready \n",
- "1 WHOI Ship Data-Grabber System ready \n",
- "2 Electron Microscope Public Image Archive ready \n",
- "3 ClinicalStudyDataRequest.com ready \n",
- "4 Germplasm Resources Information Network ready \n",
+ " attributes.metadata.name \\\n",
+ "0 WDC Sunspot Index and Long-term Solar Observat... \n",
+ "1 Biological Magnetic Resonance Data Bank \n",
+ "2 Fisheries and Oceans Canada Pacific Region Dat... \n",
+ "3 Climate Prediction Center \n",
+ "4 Acytostelium Gene Database \n",
+ "\n",
+ " attributes.metadata.status \\\n",
+ "0 ready \n",
+ "1 ready \n",
+ "2 ready \n",
+ "3 ready \n",
+ "4 deprecated \n",
"\n",
" attributes.metadata.contacts \\\n",
- "0 [{'contact-name': 'David Orloff', 'contact-ema... \n",
- "1 NaN \n",
- "2 [{'contact-name': 'General contact', 'contact-... \n",
- "3 [{'contact-email': 'support@clinicalstudydatar... \n",
- "4 [{'contact-email': 'dbmu@ars-grin.gov'}] \n",
+ "0 [{'contact-name': 'Frédéric Clette', 'contact-... \n",
+ "1 [{'contact-name': 'Helpdesk', 'contact-email':... \n",
+ "2 [{'contact-name': 'Peter Chandler', 'contact-e... \n",
+ "3 [{'contact-name': 'Jon Hoopingarner', 'contact... \n",
+ "4 [{'contact-name': 'Acytostelium genome consort... \n",
"\n",
- " attributes.metadata.homepage \\\n",
- "0 http://www.cellimagelibrary.org \n",
- "1 http://4dgeo.whoi.edu/shipdata/SDG_shipdata.html \n",
- "2 https://www.ebi.ac.uk/pdbe/emdb/empiar/ \n",
- "3 https://clinicalstudydatarequest.com/ \n",
- "4 https://www.ars-grin.gov/ \n",
+ " attributes.metadata.homepage \\\n",
+ "0 http://sidc.be/silso/home \n",
+ "1 https://bmrb.io/ \n",
+ "2 http://www.pac.dfo-mpo.gc.ca/science/oceans/da... \n",
+ "3 https://www.cpc.ncep.noaa.gov/ \n",
+ "4 http://cosmos.bot.kyoto-u.ac.jp/acytodb//cgi-b... \n",
"\n",
" attributes.metadata.identifier \\\n",
- "0 1723 \n",
- "1 3101 \n",
- "2 2649 \n",
- "3 2657 \n",
- "4 2078 \n",
+ "0 3226 \n",
+ "1 2114 \n",
+ "2 3022 \n",
+ "3 2998 \n",
+ "4 2301 \n",
"\n",
" attributes.metadata.description \\\n",
- "0 This library is a public and easily accessible... \n",
- "1 The WHOI Ship DataGrabber system provides the ... \n",
- "2 EMPIAR, the Electron Microscopy Public Image A... \n",
- "3 ClinicalStudyDataRequest.com (CSDR) is a conso... \n",
- "4 GRIN provides National Genetic Resources Progr... \n",
+ "0 The WDC-SILSO is an activity of the Operationa... \n",
+ "1 BMRB collects, annotates, archives, and dissem... \n",
+ "2 The Institute of Ocean Sciences (IOS)/Ocean Sc... \n",
+ "3 The Climate Prediction Center (CPC) produces o... \n",
+ "4 Genome and transcriptome database of Acytostel... \n",
+ "\n",
+ " attributes.metadata.abbreviation \\\n",
+ "0 WDC-SILSO \n",
+ "1 BMRB \n",
+ "2 None \n",
+ "3 CPC \n",
+ "4 NaN \n",
"\n",
" attributes.metadata.support-links \\\n",
- "0 [{'url': 'http://www.cellimagelibrary.org/page... \n",
- "1 [{'url': 'http://4dgeo.whoi.edu/shipdata/SDG_o... \n",
- "2 [{'url': 'https://www.ebi.ac.uk/support/EMPIAR... \n",
- "3 [{'url': 'https://clinicalstudydatarequest.com... \n",
- "4 [{'url': 'https://www.ars-grin.gov/Pages/Colle... \n",
+ "0 [{'url': 'http://www.sidc.be/silso/taxonomy/te... \n",
+ "1 [{'url': 'https://bmrb.io/bmrb/news/', 'name':... \n",
+ "2 [{'url': 'DFO.PAC.SCI.IOSData-DonneesISO.SCI.P... \n",
+ "3 [{'url': 'https://www.cpc.ncep.noaa.gov/commen... \n",
+ "4 NaN \n",
"\n",
" attributes.metadata.year-creation \\\n",
- "0 2010.0 \n",
- "1 2004.0 \n",
- "2 2015.0 \n",
- "3 2014.0 \n",
- "4 2010.0 \n",
+ "0 2013.0 \n",
+ "1 1988.0 \n",
+ "2 NaN \n",
+ "3 1970.0 \n",
+ "4 2008.0 \n",
"\n",
" attributes.metadata.data-processes \\\n",
- "0 [{'name': 'live update', 'type': 'data release... \n",
- "1 [{'url': 'http://4dgeo.whoi.edu/sdg-bin/dv_mai... \n",
- "2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n",
- "3 [{'url': 'https://clinicalstudydatarequest.com... \n",
- "4 [{'url': 'https://www.ars-grin.gov/', 'name': ... \n",
+ "0 [{'url': 'http://www.sidc.be/silso/datafiles',... \n",
+ "1 [{'url': 'https://bmrb.io/data_library/rsync.s... \n",
+ "2 [{'name': 'Users must contact the Senior Analy... \n",
+ "3 [{'url': 'https://www.cpc.ncep.noaa.gov/', 'na... \n",
+ "4 NaN \n",
+ "\n",
+ " attributes.metadata.cross-references \\\n",
+ "0 [{'url': 'https://www.re3data.org/repository/r... \n",
+ "1 [{'url': 'https://www.re3data.org/repository/r... \n",
+ "2 [{'url': 'https://www.re3data.org/repository/r... \n",
+ "3 [{'url': 'https://www.re3data.org/repository/r... \n",
+ "4 NaN \n",
"\n",
" attributes.legacy-ids attributes.fairsharing-registry \\\n",
- "0 [biodbcore-000180, bsg-d000180] Database \n",
- "1 [biodbcore-001609, bsg-d001609] Database \n",
- "2 [biodbcore-001140, bsg-d001140] Database \n",
- "3 [biodbcore-001149, bsg-d001149] Database \n",
- "4 [biodbcore-000546, bsg-d000546] Database \n",
+ "0 [biodbcore-001740, bsg-d001740] Database \n",
+ "1 [biodbcore-000584, bsg-d000584] Database \n",
+ "2 [biodbcore-001530, bsg-d001530] Database \n",
+ "3 [biodbcore-001504, bsg-d001504] Database \n",
+ "4 [biodbcore-000775, bsg-d000775] Database \n",
"\n",
- " attributes.record-type attributes.subjects \\\n",
- "0 repository [Cell Biology, Life Science] \n",
- "1 repository [Earth Science, Water Research, Oceanography] \n",
- "2 repository [Bioinformatics, Biology] \n",
- "3 repository [Preclinical Studies, Biomedical Science] \n",
- "4 repository [Life Science] \n",
+ " attributes.record-type attributes.subjects \\\n",
+ "0 repository [Electromagnetism, Astrophysics and Astronomy,... \n",
+ "1 repository [Structural Biology] \n",
+ "2 repository [Environmental Science, Meteorology, Earth Sci... \n",
+ "3 repository [Hydrogeology, Geography, Meteorology, Geodesy... \n",
+ "4 repository [Genomics, Life Science, Transcriptomics] \n",
"\n",
" attributes.domains \\\n",
- "0 [Cell, Microscopy, Light microscopy, Electron ... \n",
- "1 [] \n",
- "2 [Protein image, Microscopy, Electron microscop... \n",
- "3 [] \n",
- "4 [Cell, Cell culture, Germplasm] \n",
+ "0 [Climate, Observation design] \n",
+ "1 [Molecular structure, Protein structure, Pepti... \n",
+ "2 [Climate] \n",
+ "3 [Climate] \n",
+ "4 [DNA sequence data, Gene model annotation] \n",
"\n",
- " attributes.taxonomies attributes.user-defined-tags \\\n",
- "0 [All] [] \n",
- "1 [Not applicable] [subseafloor environments] \n",
- "2 [All] [] \n",
- "3 [Homo sapiens] [] \n",
- "4 [Bacteria, Metazoa, Viridiplantae] [] \n",
+ " attributes.taxonomies \\\n",
+ "0 [Not applicable] \n",
+ "1 [All] \n",
+ "2 [Not applicable] \n",
+ "3 [Not applicable] \n",
+ "4 [Acytostelium subglobosum] \n",
"\n",
- " attributes.countries \\\n",
- "0 [United States] \n",
- "1 [United States] \n",
- "2 [Greece, Czech Republic, United Kingdom, Icela... \n",
- "3 [Worldwide] \n",
- "4 [United States] \n",
+ " attributes.user-defined-tags attributes.countries \\\n",
+ "0 [Climate change, earth observation, Electromag... [Belgium] \n",
+ "1 [] [United States] \n",
+ "2 [Salinity, Temperature] [Canada] \n",
+ "3 [Forecasting, weather] [United States] \n",
+ "4 [] [United Kingdom, Japan] \n",
"\n",
" attributes.name attributes.abbreviation \\\n",
- "0 FAIRsharing record for: Cell Image Library None \n",
- "1 FAIRsharing record for: WHOI Ship Data-Grabber... None \n",
- "2 FAIRsharing record for: Electron Microscope Pu... EMPIAR \n",
- "3 FAIRsharing record for: ClinicalStudyDataReque... CSDR \n",
- "4 FAIRsharing record for: Germplasm Resources In... GRIN \n",
+ "0 FAIRsharing record for: WDC Sunspot Index and ... WDC-SILSO \n",
+ "1 FAIRsharing record for: Biological Magnetic Re... BMRB \n",
+ "2 FAIRsharing record for: Fisheries and Oceans C... None \n",
+ "3 FAIRsharing record for: Climate Prediction Center CPC \n",
+ "4 FAIRsharing record for: Acytostelium Gene Data... None \n",
"\n",
" attributes.url \\\n",
- "0 https://fairsharing.org/10.25504/FAIRsharing.8... \n",
- "1 https://fairsharing.org/fairsharing_records/3101 \n",
- "2 https://fairsharing.org/fairsharing_records/2649 \n",
- "3 https://fairsharing.org/10.25504/FAIRsharing.t... \n",
- "4 https://fairsharing.org/10.25504/FAIRsharing.3... \n",
+ "0 https://fairsharing.org/10.25504/FAIRsharing.d... \n",
+ "1 https://fairsharing.org/10.25504/FAIRsharing.p... \n",
+ "2 https://fairsharing.org/10.25504/FAIRsharing.8... \n",
+ "3 https://fairsharing.org/10.25504/FAIRsharing.e... \n",
+ "4 https://fairsharing.org/10.25504/FAIRsharing.m... \n",
"\n",
" attributes.doi \\\n",
- "0 10.25504/FAIRsharing.8t18te \n",
- "1 None \n",
- "2 None \n",
- "3 10.25504/FAIRsharing.tnByoG \n",
- "4 10.25504/FAIRsharing.3axym7 \n",
+ "0 10.25504/FAIRsharing.d6423b \n",
+ "1 10.25504/FAIRsharing.p06nme \n",
+ "2 10.25504/FAIRsharing.8b7a2f \n",
+ "3 10.25504/FAIRsharing.e08886 \n",
+ "4 10.25504/FAIRsharing.meh9wz \n",
"\n",
" attributes.fairsharing-licence \\\n",
"0 https://creativecommons.org/licenses/by-sa/4.0... \n",
@@ -4137,60 +4254,144 @@
"4 https://creativecommons.org/licenses/by-sa/4.0... \n",
"\n",
" attributes.description \\\n",
- "0 This FAIRsharing record describes: This librar... \n",
- "1 This FAIRsharing record describes: The WHOI Sh... \n",
- "2 This FAIRsharing record describes: EMPIAR, the... \n",
- "3 This FAIRsharing record describes: ClinicalStu... \n",
- "4 This FAIRsharing record describes: GRIN provid... \n",
+ "0 This FAIRsharing record describes: The WDC-SIL... \n",
+ "1 This FAIRsharing record describes: BMRB collec... \n",
+ "2 This FAIRsharing record describes: The Institu... \n",
+ "3 This FAIRsharing record describes: The Climate... \n",
+ "4 This FAIRsharing record describes: Genome and ... \n",
"\n",
" attributes.publications \\\n",
- "0 [{'id': 232, 'pubmed_id': 23203874, 'title': '... \n",
- "1 [] \n",
- "2 [{'id': 2232, 'pubmed_id': 27067018, 'title': ... \n",
+ "0 [] \n",
+ "1 [{'id': 552, 'pubmed_id': 18288446, 'title': '... \n",
+ "2 [] \n",
"3 [] \n",
- "4 [] \n",
+ "4 [{'id': 1139, 'pubmed_id': 25758444, 'title': ... \n",
"\n",
- " attributes.licence-links \\\n",
- "0 [{'licence-name': 'Cell Image Library Data Pol... \n",
- "1 [{'licence-name': 'NDSF Data Archive Policy', ... \n",
- "2 [{'licence-name': 'EMBL-EBI Terms of Use', 'li... \n",
- "3 [{'licence-name': 'CSDR Data Sharing Agreement... \n",
- "4 [] \n",
+ " attributes.licence-links attributes.url-for-logo \\\n",
+ "0 [{'licence-name': 'SILSO legal notices', 'lice... None \n",
+ "1 [{'licence-name': 'wwPDB Privacy and Usage Pol... None \n",
+ "2 [{'licence-name': 'Fisheries and Oceans Canada... None \n",
+ "3 [{'licence-name': 'National Weather Service Di... None \n",
+ "4 [] None \n",
"\n",
" attributes.metadata.citations \\\n",
"0 NaN \n",
- "1 NaN \n",
- "2 [{'doi': '10.1038/nmeth.3806', 'pubmed-id': 27... \n",
- "3 NaN \n",
- "4 NaN \n",
- "\n",
- " attributes.metadata.abbreviation \\\n",
- "0 NaN \n",
- "1 NaN \n",
- "2 EMPIAR \n",
- "3 CSDR \n",
- "4 GRIN \n",
- "\n",
- " attributes.metadata.access-points \\\n",
- "0 NaN \n",
- "1 NaN \n",
- "2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n",
+ "1 [{'doi': '10.1093/nar/gkm957', 'pubmed-id': 17... \n",
+ "2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
" attributes.metadata.associated-tools \\\n",
"0 NaN \n",
- "1 NaN \n",
- "2 [{'url': 'https://www.ebi.ac.uk/pdbe/emdb/empi... \n",
+ "1 [{'url': 'https://bmrb.io/validate/', 'name': ... \n",
+ "2 NaN \n",
"3 NaN \n",
"4 NaN \n",
"\n",
- " attributes.metadata.deprecation-date attributes.metadata.deprecation-reason \\\n",
- "0 NaN NaN \n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
+ " attributes.metadata.deprecation-reason \\\n",
+ "0 NaN \n",
+ "1 \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 This resource is no longer available at the st... \n",
+ "\n",
+ " attributes.metadata.data-access-condition.type \\\n",
+ "0 NaN \n",
+ "1 open \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " attributes.metadata.data-contact-information \\\n",
+ "0 NaN \n",
+ "1 yes \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " attributes.metadata.data-deposition-condition.url \\\n",
+ "0 NaN \n",
+ "1 https://bmrb.io/deposit/ \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " attributes.metadata.data-deposition-condition.type \\\n",
+ "0 NaN \n",
+ "1 open \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " attributes.metadata.deprecation-date attributes.metadata.access-points \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 2021-9-17 NaN \n",
+ "\n",
+ " attributes.metadata.data-access-condition.url \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " attributes.metadata.resource-sustainability.url \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " attributes.metadata.resource-sustainability.name \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " attributes.metadata.data-preservation-policy.url \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " attributes.metadata.data-preservation-policy.name \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " attributes.metadata.data-access-for-pre-publication-review \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " attributes.metadata.data-versioning attributes.metadata.data-curation.type \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " attributes.metadata.data-curation.url \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " attributes.metadata.citation-to-related-publications \\\n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
"\n",
" attributes.metadata.tombstone \n",
"0 NaN \n",
@@ -4200,13 +4401,13 @@
"4 NaN "
]
},
- "execution_count": 21,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "with open('../data/raw/fairsharing_dump_api_09_2021.json') as f:\n",
+ "with open('../data/raw/fairsharing_dump_api_02_2022.json') as f:\n",
" lines = f.read().splitlines()\n",
" \n",
"fairsharing_df = pd.DataFrame(lines)\n",
@@ -4219,7 +4420,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -4254,9 +4455,11 @@
" attributes.metadata.homepage | \n",
" attributes.metadata.identifier | \n",
" attributes.metadata.description | \n",
+ " attributes.metadata.abbreviation | \n",
" attributes.metadata.support-links | \n",
" attributes.metadata.year-creation | \n",
" attributes.metadata.data-processes | \n",
+ " attributes.metadata.cross-references | \n",
" attributes.legacy-ids | \n",
" attributes.fairsharing-registry | \n",
" attributes.record-type | \n",
@@ -4273,113 +4476,161 @@
" attributes.description | \n",
" attributes.publications | \n",
" attributes.licence-links | \n",
+ " attributes.url-for-logo | \n",
" attributes.metadata.citations | \n",
- " attributes.metadata.abbreviation | \n",
- " attributes.metadata.access-points | \n",
" attributes.metadata.associated-tools | \n",
- " attributes.metadata.deprecation-date | \n",
" attributes.metadata.deprecation-reason | \n",
+ " attributes.metadata.data-access-condition.type | \n",
+ " attributes.metadata.data-contact-information | \n",
+ " attributes.metadata.data-deposition-condition.url | \n",
+ " attributes.metadata.data-deposition-condition.type | \n",
+ " attributes.metadata.deprecation-date | \n",
+ " attributes.metadata.access-points | \n",
+ " attributes.metadata.data-access-condition.url | \n",
+ " attributes.metadata.resource-sustainability.url | \n",
+ " attributes.metadata.resource-sustainability.name | \n",
+ " attributes.metadata.data-preservation-policy.url | \n",
+ " attributes.metadata.data-preservation-policy.name | \n",
+ " attributes.metadata.data-access-for-pre-publication-review | \n",
+ " attributes.metadata.data-versioning | \n",
+ " attributes.metadata.data-curation.type | \n",
+ " attributes.metadata.data-curation.url | \n",
+ " attributes.metadata.citation-to-related-publications | \n",
" attributes.metadata.tombstone | \n",
"
\n",
" \n",
"