starting to analyse overlap

This commit is contained in:
Andrea Mannocci 2021-07-26 11:15:14 +02:00
parent dd6b79e69f
commit 7ab83cbb10
7 changed files with 2544 additions and 8584 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -24,16 +24,10 @@
"import ast\n", "import ast\n",
"import csv\n", "import csv\n",
"import json\n", "import json\n",
"import reverse_geocoder as rg\n",
"\n", "\n",
"import numpy as np\n", "import numpy as np\n",
"import pandas as pd\n", "import pandas as pd\n",
"\n", "\n",
"import pycountry_convert\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib_venn import venn2, venn2_circles\n",
"\n",
"import plotly\n", "import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n", "from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n", "import plotly.graph_objs as go\n",

View File

@ -9,16 +9,10 @@
"import ast\n", "import ast\n",
"import csv\n", "import csv\n",
"import json\n", "import json\n",
"import reverse_geocoder as rg\n",
"\n", "\n",
"import numpy as np\n", "import numpy as np\n",
"import pandas as pd\n", "import pandas as pd\n",
"\n", "\n",
"import pycountry_convert\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib_venn import venn2, venn2_circles\n",
"\n",
"import plotly\n", "import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n", "from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n", "import plotly.graph_objs as go\n",

View File

@ -9,16 +9,10 @@
"import ast\n", "import ast\n",
"import csv\n", "import csv\n",
"import json\n", "import json\n",
"import reverse_geocoder as rg\n",
"\n", "\n",
"import numpy as np\n", "import numpy as np\n",
"import pandas as pd\n", "import pandas as pd\n",
"\n", "\n",
"import pycountry_convert\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib_venn import venn2, venn2_circles\n",
"\n",
"import plotly\n", "import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n", "from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n", "import plotly.graph_objs as go\n",
@ -999,12 +993,12 @@
" <td>1</td>\n", " <td>1</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>disk0/00/00/12/03</td>\n", " <td>disk0/00/00/14/07</td>\n",
" <td>2010-01-06 13:43:48</td>\n", " <td>2010-01-06 13:43:48</td>\n",
" <td>2011-07-06 08:24:53</td>\n", " <td>2011-07-06 08:24:53</td>\n",
" <td>2010-01-06 13:43:48</td>\n", " <td>2010-01-06 13:43:48</td>\n",
" <td>institutional</td>\n", " <td>institutional</td>\n",
" <td>10164</td>\n", " <td>12637</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>show</td>\n", " <td>show</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -1026,9 +1020,9 @@
" <td>http://eprints.upnjatim.ac.id/</td>\n", " <td>http://eprints.upnjatim.ac.id/</td>\n",
" <td>Repositorio Institucional</td>\n", " <td>Repositorio Institucional</td>\n",
" <td>http://virtuelcampus.univ-msila.dz/fll</td>\n", " <td>http://virtuelcampus.univ-msila.dz/fll</td>\n",
" <td>http://repositorio.autonoma.edu.co/jspui/</td>\n", " <td>http://npl.csircentral.net/</td>\n",
" <td>http://eprints.upnjatim.ac.id/cgi/latest_tool?...</td>\n", " <td>http://eprints.upnjatim.ac.id/cgi/latest_tool?...</td>\n",
" <td>http://cdigital.uv.mx/feed/rss_1.0/site</td>\n", " <td>https://twitter.com/rpsicomdp?lang=es</td>\n",
" <td>info:other:archives.eprints.org:import</td>\n", " <td>info:other:archives.eprints.org:import</td>\n",
" <td>TRUE</td>\n", " <td>TRUE</td>\n",
" <td>TRUE</td>\n", " <td>TRUE</td>\n",
@ -1044,7 +1038,7 @@
" <td>other</td>\n", " <td>other</td>\n",
" <td>L1</td>\n", " <td>L1</td>\n",
" <td>2006-05-04 10:48:14</td>\n", " <td>2006-05-04 10:48:14</td>\n",
" <td>CSIR-The National Physical Laboratory(NPL) is ...</td>\n", " <td>¿Quién puede depositar documentos en el reposi...</td>\n",
" <td>This repository is hosted by the Texas Digital...</td>\n", " <td>This repository is hosted by the Texas Digital...</td>\n",
" <td>0</td>\n", " <td>0</td>\n",
" <td>0</td>\n", " <td>0</td>\n",
@ -1060,9 +1054,9 @@
" <td>celestial</td>\n", " <td>celestial</td>\n",
" <td>opendoar</td>\n", " <td>opendoar</td>\n",
" <td>2021-01-25</td>\n", " <td>2021-01-25</td>\n",
" <td>16</td>\n", " <td>367</td>\n",
" <td>65</td>\n", " <td>738</td>\n",
" <td>211</td>\n", " <td>362</td>\n",
" <td>824</td>\n", " <td>824</td>\n",
" <td>806</td>\n", " <td>806</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n", " <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
@ -1161,13 +1155,13 @@
" dir datestamp lastmod \\\n", " dir datestamp lastmod \\\n",
"count 5375 5375 5375 \n", "count 5375 5375 5375 \n",
"unique 5375 4127 3966 \n", "unique 5375 4127 3966 \n",
"top disk0/00/00/12/03 2010-01-06 13:43:48 2011-07-06 08:24:53 \n", "top disk0/00/00/14/07 2010-01-06 13:43:48 2011-07-06 08:24:53 \n",
"freq 1 16 8 \n", "freq 1 16 8 \n",
"\n", "\n",
" status_changed type succeeds commentary \\\n", " status_changed type succeeds commentary \\\n",
"count 5375 5375 107 0 \n", "count 5375 5375 107 0 \n",
"unique 4158 12 107 0 \n", "unique 4158 12 107 0 \n",
"top 2010-01-06 13:43:48 institutional 10164 NaN \n", "top 2010-01-06 13:43:48 institutional 12637 NaN \n",
"freq 16 3795 1 NaN \n", "freq 16 3795 1 NaN \n",
"\n", "\n",
" metadata_visibility latitude longitude relation_type relation_uri \\\n", " metadata_visibility latitude longitude relation_type relation_uri \\\n",
@ -1212,11 +1206,11 @@
"top Repositorio Institucional http://virtuelcampus.univ-msila.dz/fll \n", "top Repositorio Institucional http://virtuelcampus.univ-msila.dz/fll \n",
"freq 7 5 \n", "freq 7 5 \n",
"\n", "\n",
" sword_endpoint \\\n", " sword_endpoint \\\n",
"count 176 \n", "count 176 \n",
"unique 170 \n", "unique 170 \n",
"top http://repositorio.autonoma.edu.co/jspui/ \n", "top http://npl.csircentral.net/ \n",
"freq 2 \n", "freq 2 \n",
"\n", "\n",
" rss_feed \\\n", " rss_feed \\\n",
"count 1521 \n", "count 1521 \n",
@ -1224,11 +1218,11 @@
"top http://eprints.upnjatim.ac.id/cgi/latest_tool?... \n", "top http://eprints.upnjatim.ac.id/cgi/latest_tool?... \n",
"freq 5 \n", "freq 5 \n",
"\n", "\n",
" twitter_feed \\\n", " twitter_feed \\\n",
"count 115 \n", "count 115 \n",
"unique 111 \n", "unique 111 \n",
"top http://cdigital.uv.mx/feed/rss_1.0/site \n", "top https://twitter.com/rpsicomdp?lang=es \n",
"freq 2 \n", "freq 2 \n",
"\n", "\n",
" description fulltext open_access mandate \\\n", " description fulltext open_access mandate \\\n",
"count 3782 4127 4127 3676 \n", "count 3782 4127 4127 3676 \n",
@ -1257,7 +1251,7 @@
" note \\\n", " note \\\n",
"count 215 \n", "count 215 \n",
"unique 207 \n", "unique 207 \n",
"top CSIR-The National Physical Laboratory(NPL) is ... \n", "top ¿Quién puede depositar documentos en el reposi... \n",
"freq 2 \n", "freq 2 \n",
"\n", "\n",
" suggestions activity_low \\\n", " suggestions activity_low \\\n",
@ -1293,7 +1287,7 @@
" webometrics_rank webometrics_size webometrics_visibility \\\n", " webometrics_rank webometrics_size webometrics_visibility \\\n",
"count 148 148 148 \n", "count 148 148 148 \n",
"unique 148 148 148 \n", "unique 148 148 148 \n",
"top 16 65 211 \n", "top 367 738 362 \n",
"freq 1 1 1 \n", "freq 1 1 1 \n",
"\n", "\n",
" webometrics_rich_files webometrics_scholar \\\n", " webometrics_rich_files webometrics_scholar \\\n",
@ -2407,7 +2401,7 @@
" <td>{nan, 1}</td>\n", " <td>{nan, 1}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{disk0/00/00/00/01, nan}</td>\n", " <td>{nan, disk0/00/00/00/01}</td>\n",
" <td>{nan, 2010-01-06 13:43:48}</td>\n", " <td>{nan, 2010-01-06 13:43:48}</td>\n",
" <td>{nan, 2011-07-18 05:40:07}</td>\n", " <td>{nan, 2011-07-18 05:40:07}</td>\n",
" <td>{nan, 2010-01-06 13:43:48}</td>\n", " <td>{nan, 2010-01-06 13:43:48}</td>\n",
@ -2432,7 +2426,7 @@
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan, http://archivesic.ccsd.cnrs.fr/}</td>\n", " <td>{nan, http://archivesic.ccsd.cnrs.fr/}</td>\n",
" <td>{nan, @RCHIVESIC }</td>\n", " <td>{@RCHIVESIC , nan}</td>\n",
" <td>{nan, http://archivesic.ccsd.cnrs.fr/oai/oai.php}</td>\n", " <td>{nan, http://archivesic.ccsd.cnrs.fr/oai/oai.php}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
@ -2448,7 +2442,7 @@
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan, hal}</td>\n", " <td>{nan, hal}</td>\n",
" <td>{nan, geoname_2_FR}</td>\n", " <td>{geoname_2_FR, nan}</td>\n",
" <td>{nan, other}</td>\n", " <td>{nan, other}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan, 2002-05-17 19:24:41}</td>\n", " <td>{nan, 2002-05-17 19:24:41}</td>\n",
@ -2458,12 +2452,12 @@
" <td>{nan, 0}</td>\n", " <td>{nan, 0}</td>\n",
" <td>{nan, 0}</td>\n", " <td>{nan, 0}</td>\n",
" <td>{nan, 25}</td>\n", " <td>{nan, 25}</td>\n",
" <td>{nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n", " <td>{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0...</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{opendoar, celestial}</td>\n", " <td>{celestial, opendoar}</td>\n",
" <td>{58, 669}</td>\n", " <td>{58, 669}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
@ -2486,7 +2480,7 @@
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan, disk0/00/00/00/10}</td>\n", " <td>{nan, disk0/00/00/00/10}</td>\n",
" <td>{nan, 2010-01-06 13:43:48}</td>\n", " <td>{nan, 2010-01-06 13:43:48}</td>\n",
" <td>{2011-07-18 05:40:13, nan}</td>\n", " <td>{nan, 2011-07-18 05:40:13}</td>\n",
" <td>{nan, 2010-01-06 13:43:48}</td>\n", " <td>{nan, 2010-01-06 13:43:48}</td>\n",
" <td>{nan, institutional}</td>\n", " <td>{nan, institutional}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
@ -2510,20 +2504,20 @@
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan, http://www.diva-portal.org/mdh/}</td>\n", " <td>{nan, http://www.diva-portal.org/mdh/}</td>\n",
" <td>{nan, Academic Archive On-line (Mälardalen Uni...</td>\n", " <td>{nan, Academic Archive On-line (Mälardalen Uni...</td>\n",
" <td>{http://www.diva-portal.org/oai/mdh/OAI, nan}</td>\n", " <td>{nan, http://www.diva-portal.org/oai/mdh/OAI}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{TRUE, nan}</td>\n", " <td>{nan, TRUE}</td>\n",
" <td>{TRUE, nan}</td>\n", " <td>{nan, TRUE}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan, se}</td>\n", " <td>{nan, se}</td>\n",
" <td>{nan, Uppsala}</td>\n", " <td>{nan, Uppsala}</td>\n",
" <td>{nan, 59.8667}</td>\n", " <td>{nan, 59.8667}</td>\n",
" <td>{17.6333, nan}</td>\n", " <td>{nan, 17.6333}</td>\n",
" <td>{nan, diva}</td>\n", " <td>{nan, diva}</td>\n",
" <td>{nan, geoname_2_SE}</td>\n", " <td>{nan, geoname_2_SE}</td>\n",
" <td>{nan, other}</td>\n", " <td>{nan, other}</td>\n",
@ -2540,8 +2534,8 @@
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{opendoar, celestial}</td>\n", " <td>{celestial, opendoar}</td>\n",
" <td>{526, 258}</td>\n", " <td>{258, 526}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
@ -2662,18 +2656,18 @@
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan, http://edoc.sub.uni-hamburg.de/klimawand...</td>\n", " <td>{http://edoc.sub.uni-hamburg.de/klimawandel/, ...</td>\n",
" <td>{nan, Klimawandel Dokumentenserver}</td>\n", " <td>{nan, Klimawandel Dokumentenserver}</td>\n",
" <td>{nan, http://edoc.sub.uni-hamburg.de/klimawand...</td>\n", " <td>{nan, http://edoc.sub.uni-hamburg.de/klimawand...</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan, The \"Documentenserver Klimawandel\" (Repo...</td>\n", " <td>{nan, The \"Documentenserver Klimawandel\" (Repo...</td>\n",
" <td>{TRUE, nan}</td>\n", " <td>{nan, TRUE}</td>\n",
" <td>{TRUE, nan}</td>\n", " <td>{nan, TRUE}</td>\n",
" <td>{TRUE, nan}</td>\n", " <td>{nan, TRUE}</td>\n",
" <td>{nan, KLIMZUG projects, Climate Service Center...</td>\n", " <td>{nan, KLIMZUG projects, Helmholtz-Zentrum Gees...</td>\n",
" <td>{nan, http://www.climateservicecenter.de/, htt...</td>\n", " <td>{http://www.climateservicecenter.de/, nan, htt...</td>\n",
" <td>{nan, de}</td>\n", " <td>{nan, de}</td>\n",
" <td>{nan, Hamburg}</td>\n", " <td>{nan, Hamburg}</td>\n",
" <td>{nan, 53.5511}</td>\n", " <td>{nan, 53.5511}</td>\n",
@ -2681,7 +2675,7 @@
" <td>{nan, opus}</td>\n", " <td>{nan, opus}</td>\n",
" <td>{nan, geoname_2_DE}</td>\n", " <td>{nan, geoname_2_DE}</td>\n",
" <td>{nan, other}</td>\n", " <td>{nan, other}</td>\n",
" <td>{G1, HD, GE, GF, S1}</td>\n", " <td>{S1, HD, GF, GE, G1}</td>\n",
" <td>{nan, 2015-07-02 08:08:31}</td>\n", " <td>{nan, 2015-07-02 08:08:31}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
@ -2694,8 +2688,8 @@
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan, opendoar, celestial}</td>\n", " <td>{nan, celestial, opendoar}</td>\n",
" <td>{nan, 3408, 5881}</td>\n", " <td>{3408, 5881, nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
" <td>{nan}</td>\n", " <td>{nan}</td>\n",
@ -2800,7 +2794,7 @@
"\n", "\n",
" dir datestamp \\\n", " dir datestamp \\\n",
"eprintid \n", "eprintid \n",
"1 {disk0/00/00/00/01, nan} {nan, 2010-01-06 13:43:48} \n", "1 {nan, disk0/00/00/00/01} {nan, 2010-01-06 13:43:48} \n",
"10 {nan, disk0/00/00/00/10} {nan, 2010-01-06 13:43:48} \n", "10 {nan, disk0/00/00/00/10} {nan, 2010-01-06 13:43:48} \n",
"1000 {disk0/00/00/10/00} {2010-01-06 13:45:01} \n", "1000 {disk0/00/00/10/00} {2010-01-06 13:45:01} \n",
"10001 {nan, disk0/00/01/00/01} {nan, 2015-08-08 14:52:11} \n", "10001 {nan, disk0/00/01/00/01} {nan, 2015-08-08 14:52:11} \n",
@ -2809,7 +2803,7 @@
" lastmod status_changed \\\n", " lastmod status_changed \\\n",
"eprintid \n", "eprintid \n",
"1 {nan, 2011-07-18 05:40:07} {nan, 2010-01-06 13:43:48} \n", "1 {nan, 2011-07-18 05:40:07} {nan, 2010-01-06 13:43:48} \n",
"10 {2011-07-18 05:40:13, nan} {nan, 2010-01-06 13:43:48} \n", "10 {nan, 2011-07-18 05:40:13} {nan, 2010-01-06 13:43:48} \n",
"1000 {2011-07-06 08:21:21} {2010-01-06 13:45:01} \n", "1000 {2011-07-06 08:21:21} {2010-01-06 13:45:01} \n",
"10001 {nan, 2016-03-21 19:44:01} {nan, 2015-08-08 14:52:11} \n", "10001 {nan, 2016-03-21 19:44:01} {nan, 2015-08-08 14:52:11} \n",
"10008 {2016-03-21 19:43:51} {2015-08-08 14:52:26} \n", "10008 {2016-03-21 19:43:51} {2015-08-08 14:52:26} \n",
@ -2859,12 +2853,12 @@
"1 {nan} {nan, http://archivesic.ccsd.cnrs.fr/} \n", "1 {nan} {nan, http://archivesic.ccsd.cnrs.fr/} \n",
"10 {nan} {nan, http://www.diva-portal.org/mdh/} \n", "10 {nan} {nan, http://www.diva-portal.org/mdh/} \n",
"1000 {nan} {http://pam.pisharp.org/} \n", "1000 {nan} {http://pam.pisharp.org/} \n",
"10001 {nan} {nan, http://edoc.sub.uni-hamburg.de/klimawand... \n", "10001 {nan} {http://edoc.sub.uni-hamburg.de/klimawandel/, ... \n",
"10008 {nan} {http://creativematter.skidmore.edu/} \n", "10008 {nan} {http://creativematter.skidmore.edu/} \n",
"\n", "\n",
" title \\\n", " title \\\n",
"eprintid \n", "eprintid \n",
"1 {nan, @RCHIVESIC } \n", "1 {@RCHIVESIC , nan} \n",
"10 {nan, Academic Archive On-line (Mälardalen Uni... \n", "10 {nan, Academic Archive On-line (Mälardalen Uni... \n",
"1000 {PAM - Portuguese Archive of Mathematics} \n", "1000 {PAM - Portuguese Archive of Mathematics} \n",
"10001 {nan, Klimawandel Dokumentenserver} \n", "10001 {nan, Klimawandel Dokumentenserver} \n",
@ -2873,7 +2867,7 @@
" oai_pmh sword_endpoint \\\n", " oai_pmh sword_endpoint \\\n",
"eprintid \n", "eprintid \n",
"1 {nan, http://archivesic.ccsd.cnrs.fr/oai/oai.php} {nan} \n", "1 {nan, http://archivesic.ccsd.cnrs.fr/oai/oai.php} {nan} \n",
"10 {http://www.diva-portal.org/oai/mdh/OAI, nan} {nan} \n", "10 {nan, http://www.diva-portal.org/oai/mdh/OAI} {nan} \n",
"1000 {nan} {nan} \n", "1000 {nan} {nan} \n",
"10001 {nan, http://edoc.sub.uni-hamburg.de/klimawand... {nan} \n", "10001 {nan, http://edoc.sub.uni-hamburg.de/klimawand... {nan} \n",
"10008 {http://creativematter.skidmore.edu/do/oai/} {nan} \n", "10008 {http://creativematter.skidmore.edu/do/oai/} {nan} \n",
@ -2889,17 +2883,17 @@
" description fulltext \\\n", " description fulltext \\\n",
"eprintid \n", "eprintid \n",
"1 {nan} {nan} \n", "1 {nan} {nan} \n",
"10 {nan} {TRUE, nan} \n", "10 {nan} {nan, TRUE} \n",
"1000 {nan} {TRUE} \n", "1000 {nan} {TRUE} \n",
"10001 {nan, The \"Documentenserver Klimawandel\" (Repo... {TRUE, nan} \n", "10001 {nan, The \"Documentenserver Klimawandel\" (Repo... {nan, TRUE} \n",
"10008 {Welcome to Creative Matter, a repository for ... {TRUE} \n", "10008 {Welcome to Creative Matter, a repository for ... {TRUE} \n",
"\n", "\n",
" open_access mandate \\\n", " open_access mandate \\\n",
"eprintid \n", "eprintid \n",
"1 {nan} {nan} \n", "1 {nan} {nan} \n",
"10 {TRUE, nan} {nan} \n", "10 {nan, TRUE} {nan} \n",
"1000 {TRUE} {nan} \n", "1000 {TRUE} {nan} \n",
"10001 {TRUE, nan} {TRUE, nan} \n", "10001 {nan, TRUE} {nan, TRUE} \n",
"10008 {FALSE} {FALSE} \n", "10008 {FALSE} {FALSE} \n",
"\n", "\n",
" organisation_title \\\n", " organisation_title \\\n",
@ -2907,7 +2901,7 @@
"1 {nan} \n", "1 {nan} \n",
"10 {nan} \n", "10 {nan} \n",
"1000 {nan} \n", "1000 {nan} \n",
"10001 {nan, KLIMZUG projects, Climate Service Center... \n", "10001 {nan, KLIMZUG projects, Helmholtz-Zentrum Gees... \n",
"10008 {Skidmore College} \n", "10008 {Skidmore College} \n",
"\n", "\n",
" organisation_home_page location_country \\\n", " organisation_home_page location_country \\\n",
@ -2915,20 +2909,20 @@
"1 {nan} {nan, fr} \n", "1 {nan} {nan, fr} \n",
"10 {nan} {nan, se} \n", "10 {nan} {nan, se} \n",
"1000 {nan} {pt} \n", "1000 {nan} {pt} \n",
"10001 {nan, http://www.climateservicecenter.de/, htt... {nan, de} \n", "10001 {http://www.climateservicecenter.de/, nan, htt... {nan, de} \n",
"10008 {http://www.skidmore.edu/} {us} \n", "10008 {http://www.skidmore.edu/} {us} \n",
"\n", "\n",
" location_city location_latitude location_longitude \\\n", " location_city location_latitude location_longitude \\\n",
"eprintid \n", "eprintid \n",
"1 {nan} {nan} {nan} \n", "1 {nan} {nan} {nan} \n",
"10 {nan, Uppsala} {nan, 59.8667} {17.6333, nan} \n", "10 {nan, Uppsala} {nan, 59.8667} {nan, 17.6333} \n",
"1000 {Bellevue, WA} {47.6034} {-122.155} \n", "1000 {Bellevue, WA} {47.6034} {-122.155} \n",
"10001 {nan, Hamburg} {nan, 53.5511} {nan, 9.9937} \n", "10001 {nan, Hamburg} {nan, 53.5511} {nan, 9.9937} \n",
"10008 {Saratoga Springs} {43.0961} {-73.7818} \n", "10008 {Saratoga Springs} {43.0961} {-73.7818} \n",
"\n", "\n",
" software geoname version \\\n", " software geoname version \\\n",
"eprintid \n", "eprintid \n",
"1 {nan, hal} {nan, geoname_2_FR} {nan, other} \n", "1 {nan, hal} {geoname_2_FR, nan} {nan, other} \n",
"10 {nan, diva} {nan, geoname_2_SE} {nan, other} \n", "10 {nan, diva} {nan, geoname_2_SE} {nan, other} \n",
"1000 {dspace} {geoname_2_PT} {other} \n", "1000 {dspace} {geoname_2_PT} {other} \n",
"10001 {nan, opus} {nan, geoname_2_DE} {nan, other} \n", "10001 {nan, opus} {nan, geoname_2_DE} {nan, other} \n",
@ -2939,7 +2933,7 @@
"1 {nan} {nan, 2002-05-17 19:24:41} {nan} {nan} \n", "1 {nan} {nan, 2002-05-17 19:24:41} {nan} {nan} \n",
"10 {nan} {nan, 2005-12-08 13:15:22} {nan} {nan} \n", "10 {nan} {nan, 2005-12-08 13:15:22} {nan} {nan} \n",
"1000 {nan} {2006-05-04 10:48:14} {nan} {nan} \n", "1000 {nan} {2006-05-04 10:48:14} {nan} {nan} \n",
"10001 {G1, HD, GE, GF, S1} {nan, 2015-07-02 08:08:31} {nan} {nan} \n", "10001 {S1, HD, GF, GE, G1} {nan, 2015-07-02 08:08:31} {nan} {nan} \n",
"10008 {nan} {2015-07-06 17:35:50} {nan} {nan} \n", "10008 {nan} {2015-07-06 17:35:50} {nan} {nan} \n",
"\n", "\n",
" activity_low activity_medium activity_high recordcount \\\n", " activity_low activity_medium activity_high recordcount \\\n",
@ -2952,7 +2946,7 @@
"\n", "\n",
" recordhistory fulltexts_total \\\n", " recordhistory fulltexts_total \\\n",
"eprintid \n", "eprintid \n",
"1 {nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,... {nan} \n", "1 {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0... {nan} \n",
"10 {nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,1... {nan} \n", "10 {nan, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,8,39,100,1... {nan} \n",
"1000 {nan} {nan} \n", "1000 {nan} {nan} \n",
"10001 {nan} {nan} \n", "10001 {nan} {nan} \n",
@ -2968,10 +2962,10 @@
"\n", "\n",
" registry_name registry_id submit_to \\\n", " registry_name registry_id submit_to \\\n",
"eprintid \n", "eprintid \n",
"1 {opendoar, celestial} {58, 669} {nan} \n", "1 {celestial, opendoar} {58, 669} {nan} \n",
"10 {opendoar, celestial} {526, 258} {nan} \n", "10 {celestial, opendoar} {258, 526} {nan} \n",
"1000 {nan} {nan} {nan} \n", "1000 {nan} {nan} {nan} \n",
"10001 {nan, opendoar, celestial} {nan, 3408, 5881} {nan} \n", "10001 {nan, celestial, opendoar} {3408, 5881, nan} {nan} \n",
"10008 {celestial} {5882} {nan} \n", "10008 {celestial} {5882} {nan} \n",
"\n", "\n",
" submitted_to_name submitted_to_done webometrics_rank \\\n", " submitted_to_name submitted_to_done webometrics_rank \\\n",
@ -3011,7 +3005,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -3252,7 +3246,7 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>[opendoar, celestial]</td>\n", " <td>[celestial, opendoar]</td>\n",
" <td>[58, 669]</td>\n", " <td>[58, 669]</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -3329,8 +3323,8 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>[opendoar, celestial]</td>\n", " <td>[celestial, opendoar]</td>\n",
" <td>[526, 258]</td>\n", " <td>[258, 526]</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -3461,7 +3455,7 @@
" <td>TRUE</td>\n", " <td>TRUE</td>\n",
" <td>TRUE</td>\n", " <td>TRUE</td>\n",
" <td>TRUE</td>\n", " <td>TRUE</td>\n",
" <td>[KLIMZUG projects, Climate Service Center 2.0,...</td>\n", " <td>[KLIMZUG projects, Helmholtz-Zentrum Geesthach...</td>\n",
" <td>[http://www.climateservicecenter.de/, http://w...</td>\n", " <td>[http://www.climateservicecenter.de/, http://w...</td>\n",
" <td>de</td>\n", " <td>de</td>\n",
" <td>Hamburg</td>\n", " <td>Hamburg</td>\n",
@ -3470,7 +3464,7 @@
" <td>opus</td>\n", " <td>opus</td>\n",
" <td>geoname_2_DE</td>\n", " <td>geoname_2_DE</td>\n",
" <td>other</td>\n", " <td>other</td>\n",
" <td>[G1, GF, HD, GE, S1]</td>\n", " <td>[S1, GE, HD, GF, G1]</td>\n",
" <td>2015-07-02 08:08:31</td>\n", " <td>2015-07-02 08:08:31</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -3483,7 +3477,7 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>[opendoar, celestial]</td>\n", " <td>[celestial, opendoar]</td>\n",
" <td>[3408, 5881]</td>\n", " <td>[3408, 5881]</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -3688,7 +3682,7 @@
"1 NaN \n", "1 NaN \n",
"10 NaN \n", "10 NaN \n",
"1000 NaN \n", "1000 NaN \n",
"10001 [KLIMZUG projects, Climate Service Center 2.0,... \n", "10001 [KLIMZUG projects, Helmholtz-Zentrum Geesthach... \n",
"10008 Skidmore College \n", "10008 Skidmore College \n",
"\n", "\n",
" organisation_home_page location_country \\\n", " organisation_home_page location_country \\\n",
@ -3712,7 +3706,7 @@
"1 geoname_2_FR other NaN 2002-05-17 19:24:41 \n", "1 geoname_2_FR other NaN 2002-05-17 19:24:41 \n",
"10 geoname_2_SE other NaN 2005-12-08 13:15:22 \n", "10 geoname_2_SE other NaN 2005-12-08 13:15:22 \n",
"1000 geoname_2_PT other NaN 2006-05-04 10:48:14 \n", "1000 geoname_2_PT other NaN 2006-05-04 10:48:14 \n",
"10001 geoname_2_DE other [G1, GF, HD, GE, S1] 2015-07-02 08:08:31 \n", "10001 geoname_2_DE other [S1, GE, HD, GF, G1] 2015-07-02 08:08:31 \n",
"10008 geoname_2_US other NaN 2015-07-06 17:35:50 \n", "10008 geoname_2_US other NaN 2015-07-06 17:35:50 \n",
"\n", "\n",
" note suggestions activity_low activity_medium activity_high \\\n", " note suggestions activity_low activity_medium activity_high \\\n",
@ -3741,10 +3735,10 @@
"\n", "\n",
" registry_name registry_id submit_to submitted_to_name \\\n", " registry_name registry_id submit_to submitted_to_name \\\n",
"eprintid \n", "eprintid \n",
"1 [opendoar, celestial] [58, 669] NaN NaN \n", "1 [celestial, opendoar] [58, 669] NaN NaN \n",
"10 [opendoar, celestial] [526, 258] NaN NaN \n", "10 [celestial, opendoar] [258, 526] NaN NaN \n",
"1000 NaN NaN NaN NaN \n", "1000 NaN NaN NaN NaN \n",
"10001 [opendoar, celestial] [3408, 5881] NaN NaN \n", "10001 [celestial, opendoar] [3408, 5881] NaN NaN \n",
"10008 celestial 5882 NaN NaN \n", "10008 celestial 5882 NaN NaN \n",
"\n", "\n",
" submitted_to_done webometrics_rank webometrics_size \\\n", " submitted_to_done webometrics_rank webometrics_size \\\n",
@ -3772,7 +3766,7 @@
"10008 NaN NaN NaN " "10008 NaN NaN NaN "
] ]
}, },
"execution_count": 9, "execution_count": 8,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -3793,7 +3787,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -3959,7 +3953,7 @@
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>[opendoar, roarmap, celestial]</td>\n", " <td>[celestial, roarmap, opendoar]</td>\n",
" <td>[69, 166, 1106]</td>\n", " <td>[69, 166, 1106]</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -4027,7 +4021,7 @@
"4188 NaN NaN NaN \n", "4188 NaN NaN NaN \n",
"\n", "\n",
" registry_name registry_id submit_to \\\n", " registry_name registry_id submit_to \\\n",
"4188 [opendoar, roarmap, celestial] [69, 166, 1106] NaN \n", "4188 [celestial, roarmap, opendoar] [69, 166, 1106] NaN \n",
"\n", "\n",
" submitted_to_name submitted_to_done webometrics_rank webometrics_size \\\n", " submitted_to_name submitted_to_done webometrics_rank webometrics_size \\\n",
"4188 NaN NaN 1 6 \n", "4188 NaN NaN 1 6 \n",
@ -4042,7 +4036,7 @@
"4188 NaN " "4188 NaN "
] ]
}, },
"execution_count": 10, "execution_count": 9,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -4054,7 +4048,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -4277,7 +4271,7 @@
" <td>2</td>\n", " <td>2</td>\n",
" <td>2</td>\n", " <td>2</td>\n",
" <td>3802</td>\n", " <td>3802</td>\n",
" <td>3772</td>\n", " <td>3771</td>\n",
" <td>143</td>\n", " <td>143</td>\n",
" <td>1861</td>\n", " <td>1861</td>\n",
" <td>2887</td>\n", " <td>2887</td>\n",
@ -4299,7 +4293,7 @@
" <td>134</td>\n", " <td>134</td>\n",
" <td>117</td>\n", " <td>117</td>\n",
" <td>7</td>\n", " <td>7</td>\n",
" <td>4255</td>\n", " <td>4257</td>\n",
" <td>7</td>\n", " <td>7</td>\n",
" <td>1</td>\n", " <td>1</td>\n",
" <td>1</td>\n", " <td>1</td>\n",
@ -4314,18 +4308,18 @@
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>top</th>\n", " <th>top</th>\n",
" <td>10202</td>\n", " <td>1259</td>\n",
" <td>11</td>\n", " <td>11</td>\n",
" <td>archive</td>\n", " <td>archive</td>\n",
" <td>1</td>\n", " <td>1</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>disk0/00/01/46/73</td>\n", " <td>disk0/00/00/14/07</td>\n",
" <td>2010-01-06 13:43:48</td>\n", " <td>2010-01-06 13:43:48</td>\n",
" <td>2011-07-06 08:24:53</td>\n", " <td>2016-05-02 05:43:04</td>\n",
" <td>2010-01-06 13:43:48</td>\n", " <td>2010-01-06 13:43:48</td>\n",
" <td>institutional</td>\n", " <td>institutional</td>\n",
" <td>10164</td>\n", " <td>12637</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
" <td>show</td>\n", " <td>show</td>\n",
" <td>NaN</td>\n", " <td>NaN</td>\n",
@ -4347,7 +4341,7 @@
" <td>http://eprints.upnjatim.ac.id/</td>\n", " <td>http://eprints.upnjatim.ac.id/</td>\n",
" <td>Repositorio Institucional</td>\n", " <td>Repositorio Institucional</td>\n",
" <td>http://kce.docressources.info/ws/PMBWs_2</td>\n", " <td>http://kce.docressources.info/ws/PMBWs_2</td>\n",
" <td>http://my.indexcopernicus.com/fredemoreno</td>\n", " <td>http://npl.csircentral.net/</td>\n",
" <td>http://eprints.upnjatim.ac.id/cgi/latest_tool?...</td>\n", " <td>http://eprints.upnjatim.ac.id/cgi/latest_tool?...</td>\n",
" <td>http://twitter.com/bu_ufsc</td>\n", " <td>http://twitter.com/bu_ufsc</td>\n",
" <td>info:other:archives.eprints.org:import</td>\n", " <td>info:other:archives.eprints.org:import</td>\n",
@ -4365,7 +4359,7 @@
" <td>other</td>\n", " <td>other</td>\n",
" <td>K1</td>\n", " <td>K1</td>\n",
" <td>2006-05-04 10:48:14</td>\n", " <td>2006-05-04 10:48:14</td>\n",
" <td>CSIR-The National Physical Laboratory(NPL) is ...</td>\n", " <td>DSpace@Işık is a growing collection of Işık Un...</td>\n",
" <td>This repository is hosted by the Texas Digital...</td>\n", " <td>This repository is hosted by the Texas Digital...</td>\n",
" <td>0</td>\n", " <td>0</td>\n",
" <td>0</td>\n", " <td>0</td>\n",
@ -4376,14 +4370,14 @@
" <td>0</td>\n", " <td>0</td>\n",
" <td>0</td>\n", " <td>0</td>\n",
" <td>0</td>\n", " <td>0</td>\n",
" <td>[opendoar, celestial]</td>\n", " <td>[celestial, opendoar]</td>\n",
" <td>2479</td>\n", " <td>[2246, 1879]</td>\n",
" <td>[opendoar, roarmap, celestial]</td>\n", " <td>[celestial, roarmap, opendoar]</td>\n",
" <td>opendoar</td>\n", " <td>opendoar</td>\n",
" <td>2021-01-25</td>\n", " <td>2021-01-25</td>\n",
" <td>551</td>\n", " <td>367</td>\n",
" <td>274</td>\n", " <td>738</td>\n",
" <td>211</td>\n", " <td>668</td>\n",
" <td>824</td>\n", " <td>824</td>\n",
" <td>806</td>\n", " <td>806</td>\n",
" <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n", " <td>0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...</td>\n",
@ -5022,7 +5016,7 @@
" eprintid rev_number eprint_status userid importid source \\\n", " eprintid rev_number eprint_status userid importid source \\\n",
"count 5375 5375 5375 5375 0.0 0.0 \n", "count 5375 5375 5375 5375 0.0 0.0 \n",
"unique 5375 658 1 2135 NaN NaN \n", "unique 5375 658 1 2135 NaN NaN \n",
"top 10202 11 archive 1 NaN NaN \n", "top 1259 11 archive 1 NaN NaN \n",
"freq 1 332 5375 1333 NaN NaN \n", "freq 1 332 5375 1333 NaN NaN \n",
"mean NaN NaN NaN NaN NaN NaN \n", "mean NaN NaN NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN NaN NaN \n", "std NaN NaN NaN NaN NaN NaN \n",
@ -5035,7 +5029,7 @@
" dir datestamp lastmod \\\n", " dir datestamp lastmod \\\n",
"count 5375 5375 5375 \n", "count 5375 5375 5375 \n",
"unique 5375 4127 3966 \n", "unique 5375 4127 3966 \n",
"top disk0/00/01/46/73 2010-01-06 13:43:48 2011-07-06 08:24:53 \n", "top disk0/00/00/14/07 2010-01-06 13:43:48 2016-05-02 05:43:04 \n",
"freq 1 16 8 \n", "freq 1 16 8 \n",
"mean NaN NaN NaN \n", "mean NaN NaN NaN \n",
"std NaN NaN NaN \n", "std NaN NaN NaN \n",
@ -5048,7 +5042,7 @@
" status_changed type succeeds commentary \\\n", " status_changed type succeeds commentary \\\n",
"count 5375 5375 107 0.0 \n", "count 5375 5375 107 0.0 \n",
"unique 4158 12 107 NaN \n", "unique 4158 12 107 NaN \n",
"top 2010-01-06 13:43:48 institutional 10164 NaN \n", "top 2010-01-06 13:43:48 institutional 12637 NaN \n",
"freq 16 3795 1 NaN \n", "freq 16 3795 1 NaN \n",
"mean NaN NaN NaN NaN \n", "mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n", "std NaN NaN NaN NaN \n",
@ -5149,18 +5143,18 @@
"75% NaN NaN \n", "75% NaN NaN \n",
"max NaN NaN \n", "max NaN NaN \n",
"\n", "\n",
" sword_endpoint \\\n", " sword_endpoint \\\n",
"count 176 \n", "count 176 \n",
"unique 170 \n", "unique 170 \n",
"top http://my.indexcopernicus.com/fredemoreno \n", "top http://npl.csircentral.net/ \n",
"freq 2 \n", "freq 2 \n",
"mean NaN \n", "mean NaN \n",
"std NaN \n", "std NaN \n",
"min NaN \n", "min NaN \n",
"25% NaN \n", "25% NaN \n",
"50% NaN \n", "50% NaN \n",
"75% NaN \n", "75% NaN \n",
"max NaN \n", "max NaN \n",
"\n", "\n",
" rss_feed \\\n", " rss_feed \\\n",
"count 1521 \n", "count 1521 \n",
@ -5203,7 +5197,7 @@
"\n", "\n",
" organisation_home_page location_country location_city \\\n", " organisation_home_page location_country location_city \\\n",
"count 4226 5080 3655 \n", "count 4226 5080 3655 \n",
"unique 3772 143 1861 \n", "unique 3771 143 1861 \n",
"top http://www.cas.cn/ us Lima \n", "top http://www.cas.cn/ us Lima \n",
"freq 9 886 69 \n", "freq 9 886 69 \n",
"mean NaN NaN NaN \n", "mean NaN NaN NaN \n",
@ -5243,7 +5237,7 @@
" note \\\n", " note \\\n",
"count 215 \n", "count 215 \n",
"unique 207 \n", "unique 207 \n",
"top CSIR-The National Physical Laboratory(NPL) is ... \n", "top DSpace@Işık is a growing collection of Işık Un... \n",
"freq 2 \n", "freq 2 \n",
"mean NaN \n", "mean NaN \n",
"std NaN \n", "std NaN \n",
@ -5295,7 +5289,7 @@
" fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name \\\n", " fulltexts_docs fulltexts_rtotal fulltexts_rdocs registry_name \\\n",
"count 258 270 258 4603 \n", "count 258 270 258 4603 \n",
"unique 118 134 117 7 \n", "unique 118 134 117 7 \n",
"top 0 0 0 [opendoar, celestial] \n", "top 0 0 0 [celestial, opendoar] \n",
"freq 114 113 114 2114 \n", "freq 114 113 114 2114 \n",
"mean NaN NaN NaN NaN \n", "mean NaN NaN NaN NaN \n",
"std NaN NaN NaN NaN \n", "std NaN NaN NaN NaN \n",
@ -5305,23 +5299,23 @@
"75% NaN NaN NaN NaN \n", "75% NaN NaN NaN NaN \n",
"max NaN NaN NaN NaN \n", "max NaN NaN NaN NaN \n",
"\n", "\n",
" registry_id submit_to submitted_to_name \\\n", " registry_id submit_to submitted_to_name \\\n",
"count 4578 293 205 \n", "count 4578 293 205 \n",
"unique 4255 7 1 \n", "unique 4257 7 1 \n",
"top 2479 [opendoar, roarmap, celestial] opendoar \n", "top [2246, 1879] [celestial, roarmap, opendoar] opendoar \n",
"freq 4 92 205 \n", "freq 4 92 205 \n",
"mean NaN NaN NaN \n", "mean NaN NaN NaN \n",
"std NaN NaN NaN \n", "std NaN NaN NaN \n",
"min NaN NaN NaN \n", "min NaN NaN NaN \n",
"25% NaN NaN NaN \n", "25% NaN NaN NaN \n",
"50% NaN NaN NaN \n", "50% NaN NaN NaN \n",
"75% NaN NaN NaN \n", "75% NaN NaN NaN \n",
"max NaN NaN NaN \n", "max NaN NaN NaN \n",
"\n", "\n",
" submitted_to_done webometrics_rank webometrics_size \\\n", " submitted_to_done webometrics_rank webometrics_size \\\n",
"count 205 148 148 \n", "count 205 148 148 \n",
"unique 1 148 148 \n", "unique 1 148 148 \n",
"top 2021-01-25 551 274 \n", "top 2021-01-25 367 738 \n",
"freq 205 1 1 \n", "freq 205 1 1 \n",
"mean NaN NaN NaN \n", "mean NaN NaN NaN \n",
"std NaN NaN NaN \n", "std NaN NaN NaN \n",
@ -5334,7 +5328,7 @@
" webometrics_visibility webometrics_rich_files webometrics_scholar \\\n", " webometrics_visibility webometrics_rich_files webometrics_scholar \\\n",
"count 148 148 148 \n", "count 148 148 148 \n",
"unique 148 146 143 \n", "unique 148 146 143 \n",
"top 211 824 806 \n", "top 668 824 806 \n",
"freq 1 3 5 \n", "freq 1 3 5 \n",
"mean NaN NaN NaN \n", "mean NaN NaN NaN \n",
"std NaN NaN NaN \n", "std NaN NaN NaN \n",
@ -5371,7 +5365,7 @@
"max NaN " "max NaN "
] ]
}, },
"execution_count": 12, "execution_count": 10,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -5382,7 +5376,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 28, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -5431,7 +5425,7 @@
"dtype: int64" "dtype: int64"
] ]
}, },
"execution_count": 28, "execution_count": 11,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -5442,7 +5436,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 29, "execution_count": 12,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -5486,7 +5480,7 @@
"dtype: int64" "dtype: int64"
] ]
}, },
"execution_count": 29, "execution_count": 12,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -5495,6 +5489,68 @@
"roar_df.isna().sum()[40:]" "roar_df.isna().sum()[40:]"
] ]
}, },
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([nan, 'TRUE', 'FALSE'], dtype=object)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df.open_access.unique()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['subject', 'institutional', 'researchdata', 'theses', 'database',\n",
" 'other', 'journal', 'opendata', 'demonstration', 'multi',\n",
" 'learning', 'webobservatory'], dtype=object)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df.type.unique()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([nan, 'TRUE', 'FALSE'], dtype=object)"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"roar_df.mandate.unique()"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,

View File

@ -9,16 +9,10 @@
"import ast\n", "import ast\n",
"import csv\n", "import csv\n",
"import json\n", "import json\n",
"import reverse_geocoder as rg\n",
"\n", "\n",
"import numpy as np\n", "import numpy as np\n",
"import pandas as pd\n", "import pandas as pd\n",
"\n", "\n",
"import pycountry_convert\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib_venn import venn2, venn2_circles\n",
"\n",
"import plotly\n", "import plotly\n",
"from plotly.offline import iplot, init_notebook_mode\n", "from plotly.offline import iplot, init_notebook_mode\n",
"import plotly.graph_objs as go\n", "import plotly.graph_objs as go\n",
@ -222,10 +216,10 @@
" </tr>\n", " </tr>\n",
" <tr>\n", " <tr>\n",
" <th>top</th>\n", " <th>top</th>\n",
" <td>GBIF France IPT - GBIF France</td>\n", " <td>Brassica Information Portal</td>\n",
" <td>CGD</td>\n", " <td>CGD</td>\n",
" <td>https://fairsharing.org/10.25504/FAIRsharing.w...</td>\n", " <td>https://fairsharing.org/10.25504/FAIRsharing.e...</td>\n",
" <td>http://www.ebi.ac.uk/merops/</td>\n", " <td>http://web.iodp.tamu.edu/LORE/</td>\n",
" <td>United States</td>\n", " <td>United States</td>\n",
" <td>Life Science</td>\n", " <td>Life Science</td>\n",
" </tr>\n", " </tr>\n",
@ -243,23 +237,23 @@
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" full_name short_name \\\n", " full_name short_name \\\n",
"count 1752 1752 \n", "count 1752 1752 \n",
"unique 1752 1741 \n", "unique 1752 1741 \n",
"top GBIF France IPT - GBIF France CGD \n", "top Brassica Information Portal CGD \n",
"freq 1 3 \n", "freq 1 3 \n",
"\n", "\n",
" fs_url \\\n", " fs_url \\\n",
"count 1752 \n", "count 1752 \n",
"unique 1752 \n", "unique 1752 \n",
"top https://fairsharing.org/10.25504/FAIRsharing.w... \n", "top https://fairsharing.org/10.25504/FAIRsharing.e... \n",
"freq 1 \n", "freq 1 \n",
"\n", "\n",
" url countries subjects \n", " url countries subjects \n",
"count 1752 1749 1690 \n", "count 1752 1749 1690 \n",
"unique 1752 178 834 \n", "unique 1752 178 834 \n",
"top http://www.ebi.ac.uk/merops/ United States Life Science \n", "top http://web.iodp.tamu.edu/LORE/ United States Life Science \n",
"freq 1 588 367 " "freq 1 588 367 "
] ]
}, },
"execution_count": 3, "execution_count": 3,

2338
notebooks/03-overlap.ipynb Normal file

File diff suppressed because it is too large Load Diff