{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import ast\n", "import csv\n", "import json\n", "import reverse_geocoder as rg\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import plotly\n", "from plotly.offline import iplot, init_notebook_mode\n", "import plotly.graph_objs as go\n", "import plotly.express as px" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# FAIRsharing" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
full_nameshort_namefs_urlurlcountriessubjects
0GenBankGenBankhttps://fairsharing.org/10.25504/FAIRsharing.9...https://www.ncbi.nlm.nih.gov/genbank/[European Union, Japan, United States][Bioinformatics, Data Management, Data Submiss...
1GlycoNAVIGlycoNAVIhttps://fairsharing.org/10.25504/FAIRsharing.w...https://glyconavi.org/[Japan][Chemistry, Glycomics, Life Science, Organic C...
2ADHDgeneADHDgenehttps://fairsharing.org/10.25504/FAIRsharing.m...http://adhd.psych.ac.cn/[China][Biomedical Science, Genetics]
3Allele frequency resource for research and tea...ALFREDhttps://fairsharing.org/10.25504/FAIRsharing.y...http://alfred.med.yale.edu[United States][Life Science]
4Animal Transcription Factor DatabaseAnimalTFDBhttps://fairsharing.org/10.25504/FAIRsharing.e...http://bioinfo.life.hust.edu.cn/AnimalTFDB/[China][Life Science]
\n", "
" ], "text/plain": [ " full_name short_name \\\n", "0 GenBank GenBank \n", "1 GlycoNAVI GlycoNAVI \n", "2 ADHDgene ADHDgene \n", "3 Allele frequency resource for research and tea... ALFRED \n", "4 Animal Transcription Factor Database AnimalTFDB \n", "\n", " fs_url \\\n", "0 https://fairsharing.org/10.25504/FAIRsharing.9... \n", "1 https://fairsharing.org/10.25504/FAIRsharing.w... \n", "2 https://fairsharing.org/10.25504/FAIRsharing.m... \n", "3 https://fairsharing.org/10.25504/FAIRsharing.y... \n", "4 https://fairsharing.org/10.25504/FAIRsharing.e... \n", "\n", " url \\\n", "0 https://www.ncbi.nlm.nih.gov/genbank/ \n", "1 https://glyconavi.org/ \n", "2 http://adhd.psych.ac.cn/ \n", "3 http://alfred.med.yale.edu \n", "4 http://bioinfo.life.hust.edu.cn/AnimalTFDB/ \n", "\n", " countries \\\n", "0 [European Union, Japan, United States] \n", "1 [Japan] \n", "2 [China] \n", "3 [United States] \n", "4 [China] \n", "\n", " subjects \n", "0 [Bioinformatics, Data Management, Data Submiss... \n", "1 [Chemistry, Glycomics, Life Science, Organic C... \n", "2 [Biomedical Science, Genetics] \n", "3 [Life Science] \n", "4 [Life Science] " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fairsharing_df = pd.read_csv('../data/raw/FAIRsharingDBrec_summary20210304.csv', \n", " delimiter='|', header=0,\n", " names=['full_name', 'short_name', 'fs_url', 'url', 'countries', 'subjects'])\n", "fairsharing_df['subjects'] = fairsharing_df.subjects.str.split(pat=',')\n", "fairsharing_df['countries'] = fairsharing_df.countries.str.split(pat=',')\n", "fairsharing_df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
full_nameshort_namefs_urlurlcountriessubjects
count175217521752175217491690
unique1752174117521752178834
topThe Cardiovascular Research GridCGDhttps://fairsharing.org/bsg-d001750http://www.bmrb.wisc.edu/[United States][Life Science]
freq1311588367
\n", "
" ], "text/plain": [ " full_name short_name \\\n", "count 1752 1752 \n", "unique 1752 1741 \n", "top The Cardiovascular Research Grid CGD \n", "freq 1 3 \n", "\n", " fs_url url \\\n", "count 1752 1752 \n", "unique 1752 1752 \n", "top https://fairsharing.org/bsg-d001750 http://www.bmrb.wisc.edu/ \n", "freq 1 1 \n", "\n", " countries subjects \n", "count 1749 1690 \n", "unique 178 834 \n", "top [United States] [Life Science] \n", "freq 588 367 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fairsharing_df.describe()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "Life Science", "Biomedical Science", "Earth Science", "Genomics", "Environmental Science", "Oceanography", "Biodiversity", "Atmospheric Science", "Epidemiology", "Genetics", "Health Science", "Virology", "Biology", "Proteomics", "Bioinformatics", "Agriculture", "Geology", "Preclinical Studies", "Transcriptomics", "Chemistry", "Comparative Genomics", "Data Management", "Clinical Studies", "Botany", "Functional Genomics", "Medicine", "Geophysics", "Meteorology", "Humanities and Social Sciences", "Natural Science", "Social Science", "Systems Biology", "Geography", "Ecology", "Data Submission", " Annotation and Curation", "Metabolomics", "Engineering Science", "Marine Biology", "Physics", "Economics", "Hydrology", "Ontology and Terminology", "Biochemistry", "Astrophysics and Astronomy", "Phylogenetics", "Molecular biology", "Epigenetics", "Medical Virology", "Remote Sensing", "Infectious Disease Medicine", "Immunology", "Humanities", "Anatomy", "Computational Biology", "Structural Biology", "Neurobiology", "Plant Genetics", "Computer Science", "Public Health", "Knowledge and Information Systems", "Microbiology", "Demographics", "Social and Behavioural Science", "Data Visualization", "Oncology", "Developmental Biology", "Critical Care Medicine", "Hydrogeology", "Data Integration", "Glycomics", "Ecosystem Science", "Soil Science", "Geochemistry", "Population Genetics", "Drug Discovery", "Materials Science", "Water Research", "Neuroscience", "Forest Management", "Plant Breeding", "Metagenomics", "Energy Engineering", "Water Management", "Paleontology", "Software Engineering", "Geodesy", "Taxonomy", "Cell Biology", "Phylogenomics", "Immunogenetics", "Pharmacology", "Mineralogy", "Freshwater Science", "Medical Informatics", "Statistics", "Epigenomics", "Human Genetics", "Phylogeny", "Global Health", "Animal Genetics", "Cheminformatics", "Evolutionary Biology", "Zoology", "Mathematics", "Microbial Ecology", "Population Dynamics", "Political Science", "Nanotechnology", "Psychology", "Physical Geography", "Education Science", "Drug Development", "Culture", "Translational Medicine", "Pathology", "Food Security", "Informatics", "Neurophysiology", "Natural History", "Phenomics", "Nutritional Science", "Computational Neuroscience", "Biotechnology", "Bioengineering", "Geoinformatics", "Data Governance", "Cartography", "History", "Analytical Chemistry", "Organic Chemistry", "Urban Planning", "Plant Anatomy", "Enzymology", "Classical Archaeology", "Animal Husbandry", "Maritime Engineering", "Materials Engineering", "Database Management", "Cardiology", "Anthropology", "Architecture", "Transportation Planning", "Criminology", "Primary Health Care", "Molecular Genetics", "Toxicology", "Omics", "Communication Science", "Agronomy", "Physiology", "Art", "Endocrinology", "Fisheries Science", "Economic and Social History", "Drug Metabolism", "Thermodynamics", "Plant Ecology", "Tropical Medicine", "Aerospace Engineering", "Data Quality", "Chemical Engineering", "Data Mining", "Health Services Research", "Linguistics", "Medicinal Chemistry", "Agricultural Engineering", "Geriatric Medicine", "Toxicogenomics", "Drug Repositioning", "Reproductive Health", "Materials Informatics", "Construction Engineering", "Entomology", "Aquaculture", "Pediatrics", "Agroecology", "Civil Engineering", "Inorganic Molecular Chemistry", "Business Administration", "Respiratory Medicine", "Embryology", "Molecular Microbiology", "Power Engineering", "Composite Materials", "Molecular Infection Biology", "Computational Chemistry", "Synthetic Chemistry", "Synthetic Biology", "Building Engineering Physics", "Farming Systems Research", "Biomaterials", "Pharmacy", "Veterinary Medicine", "Gastroenterology", "Structural Genomics", "Pharmacogenomics", "Occupational Medicine", "Community Care", "Molecular Dynamics", "Fine Arts", "Ancient Cultures", "Human Geography", "Molecular Chemistry", "Quantitative Genetics", " Learning and Training", "Human Biology", "Rural and Agricultural Sociology", "Social Policy", "Social Psychology", "Industrial Engineering", "Jurisprudence", "Research on Teaching", "Limnology", "Agricultural Economics", "Historical Linguistics", "Data Security", "Prehistory", "Geotechnics", "Cultural Studies", "Public Finance", "Art History", "Proteogenomics", "Digital Image Processing", "Surgery", "Plant Cell Biology", " Optical and Plasma Physics", "Safety Science", "Traditional Medicine", " Molecular", "Process Engineering", "Rheumatology", "Telecommunication Engineering", "Plastics Engineering", "Acoustics", "Plant Cultivation", "Religious Studies", "Policy", "Systemic Neuroscience", "Agricultural Law", "Technical Chemistry", "Public Law", "Radiology", "Synthesis Chemistry", "Chemical Biology", "Physical Chemistry", "Horticulture", "Hematology", "Gynecology", "Artificial Intelligence", "Atomic", "Behavioural Biology", "Biological Process Engineering", "Functional Materials Research", "Biological Psychology", "Food Process Engineering", "Biomimetic Chemistry", "Biophysics", "Empirical Social Research", "Electrophysiology", "Electrical Engineering", "Biotherapeutics", "Economic Theory", "Economic Policy", "Building Design", "Developmental Neurobiology", "Dermatology", "Criminal Law", "Component Engineering", "Comparative Neurobiology", "Cognitive Neuroscience", "Clinical Veterinary Medicine", "Clinical Psychology", "Clinical Chemistry", "Classical Philology", "Cellular Neuroscience", "History of Science", "Human-Machine Systems Engineering", "Photogrammetry", "Hydraulic Engineering", "Philosophy", "Personalized Medicine", "Parasitology", "Organic Molecular Chemistry", "Ophthalmology", "Obstetrics", "Neurology", "Musculoskeletal Medicine", "Animal Breeding", "Molecular Physical Chemistry", "Molecular Neuroscience", "Microstructural Mechanical Properties of Materials", "Microbial Physiology", "Microbial Genetics", "Metal-Cutting Manufacturing Engineering", "Medicines Research and Development", "Animal Physiology", "Medical Physics", "Media Studies", "Mechanics", "Mechanical Process Engineering", "Mechanical Engineering", "Materials Structuring and Functionalisation", "Applied Linguistics", "Logistics Engineering", "Literary Studies", "Applied Mathematics", "Landscape Planning", "Applied Microbiology", "Mechanical Behaviour of Construction Materials" ], "y": [ 900, 252, 227, 166, 134, 95, 80, 78, 75, 73, 67, 66, 65, 61, 60, 58, 51, 48, 48, 46, 46, 45, 45, 44, 42, 42, 41, 40, 39, 38, 36, 35, 33, 32, 31, 31, 30, 29, 27, 26, 26, 25, 25, 25, 25, 20, 18, 18, 18, 18, 18, 18, 18, 18, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 11, 11, 11, 11, 11, 11, 10, 10, 10, 9, 9, 9, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Fairsharing subject coverage" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fairsharing_subjects = fairsharing_df.explode('subjects').groupby('subjects')[['url']].count().sort_values('url', ascending=False)\n", "\n", "data = [\n", " go.Bar(\n", " x=fairsharing_subjects.index,\n", " y=fairsharing_subjects['url']\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Fairsharing subject coverage',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "United States", "United Kingdom", "Germany", "France", "Switzerland", "China", "Netherlands", "Italy", "Canada", "Belgium", "Spain", "Japan", "Sweden", "Czech Republic", "Norway", "Denmark", "European Union", "Austria", "Finland", "Republic of Ireland", "Australia", "Israel", "Portugal", "Hungary", "Greece", "Malta", "Lithuania", "Slovakia", "Iceland", "Luxembourg", "Montenegro", "Croatia", "Worldwide", "India", "Poland", "Singapore", "South Korea", "Russia", "South Africa", "Taiwan", "Brazil", "New Zealand", "Mexico", "Saudi Arabia", "Bulgaria", "Hong Kong", "Argentina", "Turkey", "Cyprus", "Morocco", "Uganda", "Estonia", "Romania", "Thailand", "Pakistan", "Costa Rica", "Uruguay", "United Arab Emirates", "Togo", "Antarctica", "Panama", "Honduras", "Benin", "Cameroon", "Chile", "Colombia", "Egypt", "El Salvador", "Ethiopia", "Faroe Islands", "Greenland", "Indonesia", "Nigeria", "Kenya", "Latvia", "Madagascar", "Malawi", "Mali", "Mauritania", "Mozambique", "Nicaragua", "Niger", "Zimbabwe" ], "y": [ 686, 248, 192, 162, 114, 99, 96, 91, 86, 83, 83, 80, 76, 71, 69, 67, 66, 64, 63, 62, 62, 61, 60, 59, 58, 53, 52, 52, 52, 52, 51, 51, 49, 32, 11, 10, 10, 9, 9, 8, 8, 8, 8, 6, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Fairsharing country coverage" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fairsharing_countries = fairsharing_df.explode('countries').groupby('countries')[['url']].count().sort_values('url', ascending=False)\n", "\n", "data = [\n", " go.Bar(\n", " x=fairsharing_countries.index,\n", " y=fairsharing_countries['url']\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='Fairsharing country coverage',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# re3data" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idurlofficial_nameenglish_namedescriptionlatitudelongitudesubjects
410|re3data_____::3f2e20af26ead0432f5470d8b739638dhttp://planttfdb.cbi.pku.edu.cn/Plant Transcription Factor DatabasePlantTFDBNaN0.00.0['Life Sciences', 'Basic Biological and Medica...
710|re3data_____::e1db3f9d2fa6c8d8067bc471ab50bdfchttps://spdf.gsfc.nasa.gov/Space Physics Data FacilityNASA's Space Physics Data Facility SPDFNaN0.00.0['Natural Sciences', 'Astrophysics and Astrono...
1310|re3data_____::59521daca59ac29b811343cc4cd370cfhttp://card.westgis.ac.cn/Cold and Arid Regions Science Data Center at L...CARD WDC for Glaciology and Geocryology World ...NaN0.00.0['Natural Sciences', 'Geosciences (including G...
1410|re3data_____::ec1ba1674c852466c266acb64c618d15https://www.psycharchives.org/PsycharchivesNaNNaN0.00.0['Humanities and Social Sciences', 'Psychology...
1910|re3data_____::2ada591fb1bc9aee72a6d3e0c1ae8a76https://www.ihfc-iugg.org/products/global-heat...The Global Heat Flow Database of the Internati...International Heat-flow DatabaseNaN0.00.0['Natural Sciences', 'Geology and Palaeontolog...
\n", "
" ], "text/plain": [ " id \\\n", "4 10|re3data_____::3f2e20af26ead0432f5470d8b739638d \n", "7 10|re3data_____::e1db3f9d2fa6c8d8067bc471ab50bdfc \n", "13 10|re3data_____::59521daca59ac29b811343cc4cd370cf \n", "14 10|re3data_____::ec1ba1674c852466c266acb64c618d15 \n", "19 10|re3data_____::2ada591fb1bc9aee72a6d3e0c1ae8a76 \n", "\n", " url \\\n", "4 http://planttfdb.cbi.pku.edu.cn/ \n", "7 https://spdf.gsfc.nasa.gov/ \n", "13 http://card.westgis.ac.cn/ \n", "14 https://www.psycharchives.org/ \n", "19 https://www.ihfc-iugg.org/products/global-heat... \n", "\n", " official_name \\\n", "4 Plant Transcription Factor Database \n", "7 Space Physics Data Facility \n", "13 Cold and Arid Regions Science Data Center at L... \n", "14 Psycharchives \n", "19 The Global Heat Flow Database of the Internati... \n", "\n", " english_name description latitude \\\n", "4 PlantTFDB NaN 0.0 \n", "7 NASA's Space Physics Data Facility SPDF NaN 0.0 \n", "13 CARD WDC for Glaciology and Geocryology World ... NaN 0.0 \n", "14 NaN NaN 0.0 \n", "19 International Heat-flow Database NaN 0.0 \n", "\n", " longitude subjects \n", "4 0.0 ['Life Sciences', 'Basic Biological and Medica... \n", "7 0.0 ['Natural Sciences', 'Astrophysics and Astrono... \n", "13 0.0 ['Natural Sciences', 'Geosciences (including G... \n", "14 0.0 ['Humanities and Social Sciences', 'Psychology... \n", "19 0.0 ['Natural Sciences', 'Geology and Palaeontolog... " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_df = pd.read_csv('../data/raw/re3data_opendoar.csv')\n", "re3data_df = re3data_df[re3data_df.id.str.contains('re3data')]\n", "re3data_df.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "re3data_df.loc[(re3data_df.latitude == 0.0) & (re3data_df.longitude == 0.0), ['latitude', 'longitude']] = [np.nan, np.nan]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4 ['Life Sciences', 'Basic Biological and Medica...\n", "7 ['Natural Sciences', 'Astrophysics and Astrono...\n", "13 ['Natural Sciences', 'Geosciences (including G...\n", "14 ['Humanities and Social Sciences', 'Psychology...\n", "19 ['Natural Sciences', 'Geology and Palaeontolog...\n", " ... \n", "8693 ['Life Sciences', 'Basic Biological and Medica...\n", "8695 ['Natural Sciences', 'Atmospheric Science and ...\n", "8697 ['Natural Sciences', 'Atmospheric Science and ...\n", "8699 ['Natural Sciences', 'Atmospheric Science and ...\n", "8705 ['Life Sciences', 'Plant Sciences', 'Plant Gen...\n", "Name: subjects, Length: 2693, dtype: object" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_df.subjects" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "re3data_df['subjects'] = re3data_df.subjects.apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def merge_lists(lists):\n", " res = []\n", " for l in lists:\n", " res = res + l\n", " return res\n", "\n", "re3data_cleaned_subjects = re3data_df.explode('subjects').subjects.str.split(',| and ', expand=True)\\\n", " .apply(lambda row: row.dropna().tolist(), axis=1)\\\n", " .reset_index()\\\n", " .groupby('index')[0].apply(lambda x: merge_lists(x))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "index\n", "4 [Life Sciences, Basic Biological, Medical Rese...\n", "7 [Natural Sciences, Astrophysics, Astronomy, Ph...\n", "13 [Natural Sciences, Geosciences (including Geog...\n", "14 [Humanities, Social Sciences, Psychology, Soci...\n", "19 [Natural Sciences, Geology, Palaeontology, Geo...\n", " ... \n", "8693 [Life Sciences, Basic Biological, Medical Rese...\n", "8695 [Natural Sciences, Atmospheric Science, Oceano...\n", "8697 [Natural Sciences, Atmospheric Science, Oceano...\n", "8699 [Natural Sciences, Atmospheric Science, Oceano...\n", "8705 [Life Sciences, Plant Sciences, Plant Genetics...\n", "Name: 0, Length: 2693, dtype: object" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_cleaned_subjects" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "re3data_df = re3data_df.join(re3data_cleaned_subjects)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "re3data_df.drop(columns=['subjects'], inplace=True)\n", "re3data_df.rename(columns={0:'subjects'}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idurlofficial_nameenglish_namedescriptionlatitudelongitudesubjects
count2693267326932034385.0000005.0000002693
unique269326612668201038NaNNaN1427
top10|re3data_____::e59f89142e8d47d32523c53a9137f07bhttp://iubio.bio.indiana.edu/IUBio-ArchiveResearch Data RepositoryIUBio Archive is an archive of biology data an...NaNNaN[Humanities, Social Sciences, Life Sciences, N...
freq12221NaNNaN209
meanNaNNaNNaNNaNNaN61.66811336.623678NaN
stdNaNNaNNaNNaNNaN96.98445748.547521NaN
minNaNNaNNaNNaNNaN12.12300012.123000NaN
25%NaNNaNNaNNaNNaN12.12300012.123400NaN
50%NaNNaNNaNNaNNaN12.12340012.123400NaN
75%NaNNaNNaNNaNNaN37.97116323.748590NaN
maxNaNNaNNaNNaNNaN234.000000123.000000NaN
\n", "
" ], "text/plain": [ " id \\\n", "count 2693 \n", "unique 2693 \n", "top 10|re3data_____::e59f89142e8d47d32523c53a9137f07b \n", "freq 1 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " url official_name \\\n", "count 2673 2693 \n", "unique 2661 2668 \n", "top http://iubio.bio.indiana.edu/ IUBio-Archive \n", "freq 2 2 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " english_name \\\n", "count 2034 \n", "unique 2010 \n", "top Research Data Repository \n", "freq 2 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " description latitude \\\n", "count 38 5.000000 \n", "unique 38 NaN \n", "top IUBio Archive is an archive of biology data an... NaN \n", "freq 1 NaN \n", "mean NaN 61.668113 \n", "std NaN 96.984457 \n", "min NaN 12.123000 \n", "25% NaN 12.123000 \n", "50% NaN 12.123400 \n", "75% NaN 37.971163 \n", "max NaN 234.000000 \n", "\n", " longitude subjects \n", "count 5.000000 2693 \n", "unique NaN 1427 \n", "top NaN [Humanities, Social Sciences, Life Sciences, N... \n", "freq NaN 209 \n", "mean 36.623678 NaN \n", "std 48.547521 NaN \n", "min 12.123000 NaN \n", "25% 12.123400 NaN \n", "50% 12.123400 NaN \n", "75% 23.748590 NaN \n", "max 123.000000 NaN " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "re3data_df.describe(include='all')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "Life Sciences", "Natural Sciences", "Humanities", "Social Sciences", "Medicine", "Biology", "Geosciences (including Geography)", "Oceanography", "Atmospheric Science", "Basic Biological", "Medical Research", "Engineering Sciences", "Social", "Behavioural Sciences", "Geodesy", "Geophysics", " Horticulture", " Forestry", "Agriculture", "Veterinary Medicine", "Physics", "Zoology", "Immunology", "Computer Science", "Chemistry", "General Genetics", "Microbiology", " Virology", "Plant Sciences", "Astronomy", "Astrophysics", "Economics", "Bioinformatics", "Theoretical Biology", "Geography", "Water Research", "System Engineering", " Electrical", " Health Services Research", "Public Health", " Social Medicine", "Human Genetics", "Geochemistry", "Crystallography", " Mineralogy", "Developmental Biology", "Empirical Social Research", "Architecture", "Linguistics", " Geoinformatics", " Remote Sensing", " Cartogaphy", " Photogrammetry", "Palaeontology", "Geology", " Quantum Optics", "History", "Optics", " Molecules", "Animal Genetics", " Cell", "Construction Engineering", "Cell Biology", "Neurosciences", "Ecosystem Research", " Biodiversity", "Biochemistry", "Animal Ecology", "Media Studies", "Physics of Atoms", "Plasmas", " Music", "Fine Arts", " Theatre", "Plant Ecology", "Ecosystem Analysis", "Plant Genetics", "Ancient Cultures", "Materials Science", "Cultural Anthropology", "Particles", " Nuclei", "Fields", "Economic", "Statistics", "Econometrics", "Education Sciences", "Epidemiology", " Medical Informatics", " Medical Biometry", "Political Science", "Religious Studies", "Jurisprudence", "Social Policy", " Hydrology", " Integrated Water Resources Management", "Hydrogeology", " Urban Water Management", " Limnology", " Water Chemistry", "Structural Biology", " Social", " Jewish Studies", "Cultures", "Non-European Languages", "Molecular Chemistry", "Mathematics", "Engineering", "Psychology", "Evolution", " Biochemistry", "Genetics of Microorganisms", "Metabolism", "Soil Sciences", "Theoretical Chemistry", "Physical", "Ecology of Agricultural Landscapes", "Analytical Chemistry", " Method Development (Chemistry)", "Condensed Matter Physics", "Pharmacology", "Systems Engineering", "Human Geography", "Literary Studies", "Liquids - Spectroscopy", " Interfaces", "Food Chemistry", "Biophysics", "Basic Forest Research", " Kinetics", "Physical Chemistry of Molecules", " Image", "Language Processing", "Surface Research", "Artificial Intelligence", "Chemical Solid State", "Physical Geography", " Atoms", "Biological Chemistry", " Anthropology", " Plasmas", "Ethnology/Folklore", "Art History", "Thermal Engineering/Process Engineering", "Sociology", "Agricultural Economics", "Theology", "Virology", "Cognitive Neuroscience", "Urbanism", " Transportation", "Infrastructure Planning", "Neuroimaging", "Musicology", " Landscape Planning", " Spatial Planning", "Modern", " Transfusion Medicine", " Oncology", "Medical Physics", "Plant Systematics", "Current History", "Hematology", "Biomedical Technology", " Building", "Medical Microbiology", " Building Design", " Sustainable Building Technology", "Construction History", " Molecular Infection Biology", "Morphology", "Traffic", "Electrical Engineering", "Systematics", "Transport Systems", " Logistics", "Communication Science", "Anatomy", "Pharmacy", "Use of Forest Resources", "Classical Archaeology", "Business Administration", "Inventory Control", "Public Finance", "Occupational Medicine", "Toxicology", "Social History", "Physiology", "Plant Biochemistry", "Training", "Plant Breeding", "Mechanical", "History of Science", "Pediatric", "Research on Teaching", "Process Engineering", "industrial Engineering", " Learning", " Technical Chemistry", "Adolescent Medicine", "Heat Energy Technology", " Legal History", " Legal Theory", "Plant Cultivation", "Radiology", " Fluid Mechanics", " Thermal Machines", "Legal", "Political Philosophy", "Nuclear Medicine", "Systemic Neuroscience", "Microbial Ecology", "Animal Physiology", "Applied Microbiology", " Behaviour", " Computational Neuroscience", "Jewish Studies", "Materials Engineering", "Organic Molecular Chemistry", "General", "Research on Socialization", "Professions", "Public Law", "Educational Institutions", "Software Technology", "Typology", "Basic Veterinary Medical Science", "Medieval History", "Early Modern History", "Surfaces", " Material Characterisation", "Philosophy", "Forensic Medicine", " Historical Linguistics", " Non-European Languages", "Physical Chemistry of Solids", "Pathology", " Soft Matter", "Modelling", " Metabolism", " Biological Physics", "Theory", "Ancient History", "Ancient Near Eastern Studies", "Molecular Neuroscience", "Agricultural", "Neurogenetics", " Nonlinear Dynamics", "Prehistory", "Statistical Physics", "Egyptology", "Criminology", "Inorganic Molecular Chemistry", "Gastroenterology", "Food Process Engineering", "Obstetrics", "Constructive Mechanical Engineering", "Acoustics", "Human Factors", "Gynaecology", "Oceania Studies", "Electrical Energy Generation", " Distribution", " Ergonomics", "Sensory", " Material Synthesis", "Mechanics", "Plant Cell", "Solid State", " Human-Machine Systems", "Biological", "Biomimetic Chemistry", " American", "Surface Chemistry", " Application", "African", "Behavioural Biology", "Therapy", " Methodology", " Medical Psychology", " Semitic Studies", "Plant Nutrition", "Radiobiology", "Clinical Veterinary Medicine", "Operating", "Polymer Research", "General Theoretical Chemistry", " Communication", "Islamic Studies", "Nutritional Sciences", " Breeding", "Radiation Oncology", " Clinical Psychology", "Endocrinology", "Theatre", "Differential Psychology", "Applied Linguistics", " Arabian Studies", " Diagnostics", "Hygiene", "Animal Husbandry", "Information Systems", "Basic Research on Pathogenesis", "Individual Linguistics", " Diabetology", "History of Education", "European", "Developmental", "Asian Studies", "Plant Physiology", "Energy Process Engineering", "Experimental Condensed Matter Physics", " High-Frequency", "General Education", " Industrial", "Protestant Theology", "Network Technology", "Communication", " Theoretical Electrical Engineering", "American Literature", " Hydraulic Engineering", "Clinical Neurosciences III - Ophthalmology", "Geotechnics", "Social Psychology", "Pathobiochemistry", "Geriatric Medicine", "Educational Psychology", "Organisational Psychology", "Clinical Chemistry", "Sociological Theory", "Gerontology", "Rheumatology", " Allergology", "Dentistry", " Construction Operation", "Dermatology", " Geosciences (including Geography)", "Sructural Engineering", " Control Systems", "Sintered Metallic", " Clinical Immunology", "Roman Catholic Theology", "Reproductive Medicine/Biology", " Clinical Infectiology Intensive Care Medicine", " Building Informatics", "Geosciences (including Geography) ", " Atmospheric Science", "Automation", "Veterinary Medicine ", "Mathematical Psychology", "Biological Process Engineering", " Biological", "Preparatory", "Thermodynamics", " Angiology", " Robotics", " Oral Surgery", "Comparative Literature", "Atmospheric Science ", "Biomaterials", "Law of Criminal Procedure", "Physical Chemistry of Polymers", "Kinetics of Materials", "Cardiology", " Mechatronics", "Criminal Law", "Cultural Studies", "Pneumology", "Polymer Materials", "Ceramic Materials", "Technical Thermodynamics", "Theoretical Condensed Matter Physics", "Urology", "Electronic Semiconductors", " Circuits", "Traumatology", " Chemistry", "Theoretical Computer Science", "Technical Chemistry", "Thermal Process Engineering", " Building Physics", "Theoretical Physics of Polymers", "Thermal Processes", "Inter-organismic Interactions of Plants", "Economic Theory", " Thermomechanical Treatment of Materials", "Cellular Neuroscience", "Life Sciences ", "Biological Psychiatry", "Measurement Systems", "Medical Research ", "Medieval German Literature", "Chemical", "Metallurgical", "Classical Philology", "Microstructural Mechanical Properties of Materials", "Clinical Neurosciences I - Neurology", "History of Philosophy", "Orthopaedics", " Components", " Systems", "Comparative Neurobiology", "Composite Materials", " Neurosurgery", "Construction Material Sciences", " Life Sciences ", "Cardiothoracic Surgery", "Private Law", " General Genetics", "Experimental", "Social Sciences ", "Developmental Neurobiology", " Agriculture" ], "y": [ 1440, 1325, 1238, 1222, 1014, 882, 760, 581, 535, 514, 513, 496, 451, 432, 360, 326, 317, 317, 316, 315, 308, 239, 234, 227, 224, 220, 212, 212, 210, 205, 205, 204, 175, 175, 159, 143, 142, 142, 131, 131, 131, 117, 108, 108, 108, 104, 103, 101, 101, 99, 99, 99, 99, 98, 98, 96, 96, 96, 96, 94, 94, 86, 85, 81, 79, 79, 79, 79, 78, 75, 75, 74, 74, 74, 67, 67, 65, 65, 64, 63, 63, 63, 63, 62, 62, 62, 61, 57, 57, 57, 56, 52, 50, 50, 48, 48, 48, 48, 48, 48, 45, 44, 44, 44, 44, 42, 41, 40, 38, 37, 36, 36, 36, 34, 33, 33, 31, 27, 27, 26, 25, 25, 24, 24, 23, 23, 23, 23, 23, 23, 23, 22, 22, 22, 22, 22, 22, 21, 21, 21, 21, 19, 19, 18, 18, 18, 18, 17, 17, 17, 17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, 16, 16, 15, 15, 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "re3data subject coverage" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "re3data_subjects = re3data_df.explode('subjects').groupby('subjects')[['url']].count().sort_values('url', ascending=False)\n", "\n", "data = [\n", " go.Bar(\n", " x=re3data_subjects.index,\n", " y=re3data_subjects['url']\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='re3data subject coverage',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# OpenDOAR" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idurlofficial_nameenglish_namedescriptionlatitudelongitudesubjects
010|opendoar____::e833e042f509c996b1b25324d56659fbhttp://www.bilbao.net/bldBLD - Bilboko Liburutegi DigitalaBLD - Bilboko Liburutegi DigitalaBLD is a repository of digital documents, desi...43.256699-2.924100[]
110|opendoar____::f621585df244e9596dc70a39b579efb1https://researchdirect.westernsydney.edu.au/Western Sydney ResearchDirectWestern Sydney ResearchDirectNaN0.0000000.000000[]
210|opendoar____::437d7d1d97917cd627a34a6a0fb41136http://redress.lancs.ac.uk/Learning_Space/Learning Space CatalogueNaNThis repository is a Social Science e-Science ...54.010760-2.784990['Social Sciences General', 'Science General',...
310|opendoar____::d840cc5d906c3e9c84374c8919d2074ehttp://digitallibrary.usc.edu/search/controlle...USC Digital LibraryUSC Digital LibraryThis is an institutional repository providing ...34.052200-118.242996[]
510|opendoar____::4ba3c163cd1efd4c14e3a415fa0a3010http://www.ufgd.edu.br:8080/jspui/Repositório de Divulgação das Produções Cientí...Repositório de Divulgação das Produções Cientí...This site provides access to the research outp...-22.221800-54.806400[]
\n", "
" ], "text/plain": [ " id \\\n", "0 10|opendoar____::e833e042f509c996b1b25324d56659fb \n", "1 10|opendoar____::f621585df244e9596dc70a39b579efb1 \n", "2 10|opendoar____::437d7d1d97917cd627a34a6a0fb41136 \n", "3 10|opendoar____::d840cc5d906c3e9c84374c8919d2074e \n", "5 10|opendoar____::4ba3c163cd1efd4c14e3a415fa0a3010 \n", "\n", " url \\\n", "0 http://www.bilbao.net/bld \n", "1 https://researchdirect.westernsydney.edu.au/ \n", "2 http://redress.lancs.ac.uk/Learning_Space/ \n", "3 http://digitallibrary.usc.edu/search/controlle... \n", "5 http://www.ufgd.edu.br:8080/jspui/ \n", "\n", " official_name \\\n", "0 BLD - Bilboko Liburutegi Digitala \n", "1 Western Sydney ResearchDirect \n", "2 Learning Space Catalogue \n", "3 USC Digital Library \n", "5 Repositório de Divulgação das Produções Cientí... \n", "\n", " english_name \\\n", "0 BLD - Bilboko Liburutegi Digitala \n", "1 Western Sydney ResearchDirect \n", "2 NaN \n", "3 USC Digital Library \n", "5 Repositório de Divulgação das Produções Cientí... \n", "\n", " description latitude longitude \\\n", "0 BLD is a repository of digital documents, desi... 43.256699 -2.924100 \n", "1 NaN 0.000000 0.000000 \n", "2 This repository is a Social Science e-Science ... 54.010760 -2.784990 \n", "3 This is an institutional repository providing ... 34.052200 -118.242996 \n", "5 This site provides access to the research outp... -22.221800 -54.806400 \n", "\n", " subjects \n", "0 [] \n", "1 [] \n", "2 ['Social Sciences General', 'Science General',... \n", "3 [] \n", "5 [] " ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_df = pd.read_csv('../data/raw/re3data_opendoar.csv')\n", "opendoar_df = opendoar_df[opendoar_df.id.str.contains('opendoar')]\n", "opendoar_df.head()" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 []\n", "1 []\n", "2 ['Social Sciences General', 'Science General',...\n", "3 []\n", "5 []\n", " ... \n", "8701 ['Multidisciplinary']\n", "8702 []\n", "8703 ['Business and Economics']\n", "8704 ['Earth and Planetary Sciences', 'Ecology and ...\n", "8706 []\n", "Name: subjects, Length: 6014, dtype: object" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_df.subjects" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [], "source": [ "opendoar_df['subjects'] = opendoar_df.subjects.apply(lambda x: ast.literal_eval(x))" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "opendoar_cleaned_subjects = opendoar_df.explode('subjects').subjects.str.split(',| and ', expand=True)\\\n", " .apply(lambda row: row.dropna().tolist(), axis=1)\\\n", " .reset_index()\\\n", " .groupby('index')[0].apply(lambda x: merge_lists(x))" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "index\n", "0 []\n", "1 []\n", "2 [Social Sciences General, Science General, Com...\n", "3 []\n", "5 []\n", " ... \n", "8701 [Multidisciplinary]\n", "8702 []\n", "8703 [Business, Economics]\n", "8704 [Earth, Planetary Sciences, Ecology, Environme...\n", "8706 []\n", "Name: 0, Length: 6014, dtype: object" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_cleaned_subjects" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [], "source": [ "opendoar_df = opendoar_df.join(opendoar_cleaned_subjects)" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "opendoar_df.drop(columns=['subjects'], inplace=True)\n", "opendoar_df.rename(columns={0: 'subjects'}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idurlofficial_nameenglish_namedescriptionlatitudelongitudesubjects
count601460136014550057766014.0000006014.0000006014
unique60145953594654134920NaNNaN201
top10|opendoar____::a2557a7b2e94197ff767970b67041697http://harp.lib.hiroshima-u.ac.jp/Hiroshima Associated Repository PortalAURAThis site provides access to the research outp...NaNNaN[]
freq133498NaNNaN5273
meanNaNNaNNaNNaNNaN38.6493937.810948NaN
stdNaNNaNNaNNaNNaN788.40617371.689788NaN
minNaNNaNNaNNaNNaN-79.029999-683.103027NaN
25%NaNNaNNaNNaNNaN4.644632-49.273300NaN
50%NaNNaNNaNNaNNaN37.9304494.788870NaN
75%NaNNaNNaNNaNNaN47.29440030.685501NaN
maxNaNNaNNaNNaNNaN61138.800781178.438995NaN
\n", "
" ], "text/plain": [ " id \\\n", "count 6014 \n", "unique 6014 \n", "top 10|opendoar____::a2557a7b2e94197ff767970b67041697 \n", "freq 1 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " url \\\n", "count 6013 \n", "unique 5953 \n", "top http://harp.lib.hiroshima-u.ac.jp/ \n", "freq 3 \n", "mean NaN \n", "std NaN \n", "min NaN \n", "25% NaN \n", "50% NaN \n", "75% NaN \n", "max NaN \n", "\n", " official_name english_name \\\n", "count 6014 5500 \n", "unique 5946 5413 \n", "top Hiroshima Associated Repository Portal AURA \n", "freq 3 4 \n", "mean NaN NaN \n", "std NaN NaN \n", "min NaN NaN \n", "25% NaN NaN \n", "50% NaN NaN \n", "75% NaN NaN \n", "max NaN NaN \n", "\n", " description latitude \\\n", "count 5776 6014.000000 \n", "unique 4920 NaN \n", "top This site provides access to the research outp... NaN \n", "freq 98 NaN \n", "mean NaN 38.649393 \n", "std NaN 788.406173 \n", "min NaN -79.029999 \n", "25% NaN 4.644632 \n", "50% NaN 37.930449 \n", "75% NaN 47.294400 \n", "max NaN 61138.800781 \n", "\n", " longitude subjects \n", "count 6014.000000 6014 \n", "unique NaN 201 \n", "top NaN [] \n", "freq NaN 5273 \n", "mean 7.810948 NaN \n", "std 71.689788 NaN \n", "min -683.103027 NaN \n", "25% -49.273300 NaN \n", "50% 4.788870 NaN \n", "75% 30.685501 NaN \n", "max 178.438995 NaN " ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "opendoar_df.describe(include='all')" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "Multidisciplinary", "Medicine", "Health", "Science General", "Technology General", "Economics", "Business", "Archaeology", "Social Sciences General", "History", "Politics", "Environment", "Ecology", "Law", "Computers", "IT", "Biology", "Biochemistry", "Information Science", "Library", "Humanities General", "Arts", "Education", " Food", "Agriculture", "Statistics", "Mathematics", "Literature", "Veterinary", "Astronomy", "Physics", "Geography", "Regional Studies", "Language", "Religion", "Chemical Technology", "Chemistry", "Philosophy", "Fine", "Performing Arts", "Planning", "Psychology", "Management", "Planetary Sciences", "Earth", "Electrical", "Electronic Engineering", "Architecture", "Civil Engineering", "Mechanical Engineering", "Materials", " History", " Philosophy", " Health", "Social Sciences General ", " Language", " Technology General", " Law", "Performing Arts ", " Science General", "Medicine ", "IT ", "Veterinary " ], "y": [ 466, 67, 66, 63, 53, 52, 52, 49, 48, 47, 44, 44, 44, 43, 43, 42, 40, 40, 36, 36, 35, 35, 32, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 29, 27, 27, 27, 25, 23, 22, 17, 17, 17, 16, 16, 12, 12, 12, 8, 7, 7, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "OpenDOAR subject coverage" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "opendoar_subjects = opendoar_df.explode('subjects').groupby('subjects')[['url']].count().sort_values('url', ascending=False)\n", "\n", "data = [\n", " go.Bar(\n", " x=opendoar_subjects.index,\n", " y=opendoar_subjects['url']\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='OpenDOAR subject coverage',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
latlonnameadmin1admin2cc
043.26271-2.92528BilbaoBasque CountryBizkaiaES
14.88447-1.75536TakoradiWesternGH
253.98333-2.78333GalgateEnglandLancashireGB
334.05223-118.24368Los AngelesCaliforniaLos Angeles CountyUS
4-22.22111-54.80556DouradosMato Grosso do SulDouradosBR
.....................
600940.8563114.24641NapoliCampaniaProvincia di NapoliIT
601038.1939415.55256MessinaSicilyMessinaIT
601154.3213310.13489KielSchleswig-HolsteinDE
601243.40785-73.25955GranvilleNew YorkWashington CountyUS
601333.96095-83.37794AthensGeorgiaClarke CountyUS
\n", "

6014 rows × 6 columns

\n", "
" ], "text/plain": [ " lat lon name admin1 \\\n", "0 43.26271 -2.92528 Bilbao Basque Country \n", "1 4.88447 -1.75536 Takoradi Western \n", "2 53.98333 -2.78333 Galgate England \n", "3 34.05223 -118.24368 Los Angeles California \n", "4 -22.22111 -54.80556 Dourados Mato Grosso do Sul \n", "... ... ... ... ... \n", "6009 40.85631 14.24641 Napoli Campania \n", "6010 38.19394 15.55256 Messina Sicily \n", "6011 54.32133 10.13489 Kiel Schleswig-Holstein \n", "6012 43.40785 -73.25955 Granville New York \n", "6013 33.96095 -83.37794 Athens Georgia \n", "\n", " admin2 cc \n", "0 Bizkaia ES \n", "1 GH \n", "2 Lancashire GB \n", "3 Los Angeles County US \n", "4 Dourados BR \n", "... ... .. \n", "6009 Provincia di Napoli IT \n", "6010 Messina IT \n", "6011 DE \n", "6012 Washington County US \n", "6013 Clarke County US \n", "\n", "[6014 rows x 6 columns]" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reverse_geocoding = pd.DataFrame(rg.search(opendoar_df[['latitude', 'longitude']].apply(tuple, axis=1).tolist()))\n", "reverse_geocoding['lat'] = reverse_geocoding['lat'].astype('float')\n", "reverse_geocoding['lon'] = reverse_geocoding['lon'].astype('float')\n", "reverse_geocoding" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [], "source": [ "opendoar_df = opendoar_df.join(reverse_geocoding[['cc']])" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [], "source": [ "opendoar_df.loc[(opendoar_df.latitude == 0.0) & (opendoar_df.longitude == 0.0), ['latitude', 'longitude', 'cc']] = [np.nan, np.nan, np.nan]\n" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "linkText": "Export to plot.ly", "plotlyServerURL": "https://plot.ly", "showLink": false }, "data": [ { "type": "bar", "x": [ "US", "GH", "JP", "GB", "DE", "ES", "BR", "PE", "TR", "FR", "HR", "ID", "IT", "PL", "CA", "AU", "IN", "UA", "NO", "CO", "NL", "AR", "CN", "PT", "TW", "CH", "SE", "GR", "ZA", "MX", "AT", "HU", "BY", "BE", "RS", "KE", "EC", "FI", "NG", "RU", "IE", "KR", "MY", "CZ", "IR", "VE", "CL", "EG", "LT", "BD", "LK", "DK", "NZ", "TZ", "UG", "DZ", "SA", "NI", "MD", "SD", "ZW", "SI", "CU", "KZ", "HK", "TH", "JM", "EE", "SV", "UY", "MK", "PH", "PS", "BW", "PK", "BO", "SN", "DO", "LB", "LV", "FJ", "NA", "SG", "BG", "YE", "SJ", "LY", "RO", "PY", "MN", "CR", "IL", "TN", "PA", "MZ", "CY", "TT", "XK", "VA", "VN", "SY", "ZM", "AE", "RW", "IS", "AM", "AZ", "BN", "CM", "CV", "ET", "HN", "IQ", "KG", "PR", "LA", "AL", "LU", "MO", "MV", "MW", "NC", "NP", "LS" ], "y": [ 541, 410, 345, 191, 174, 104, 100, 83, 82, 79, 79, 77, 76, 69, 67, 64, 63, 61, 58, 54, 47, 43, 38, 37, 36, 36, 35, 29, 27, 27, 26, 26, 25, 25, 22, 20, 20, 20, 20, 18, 18, 17, 16, 14, 13, 12, 11, 10, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 7, 7, 7, 6, 6, 6, 6, 6, 6, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ] } ], "layout": { "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "OpenDOAR country coverage" }, "xaxis": { "tickangle": 45, "tickfont": { "size": 12 } } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "opendoar_countries = opendoar_df.groupby('cc')[['id']].count().sort_values('id', ascending=False)\n", "\n", "data = [\n", " go.Bar(\n", " x=opendoar_countries.index,\n", " y=opendoar_countries['id']\n", " )\n", "]\n", "\n", "layout = go.Layout(\n", " title='OpenDOAR country coverage',\n", " xaxis=dict(tickangle=45, tickfont=dict(size=12))\n", ")\n", "fig = go.Figure(data=data, layout=layout)\n", "plotly.offline.iplot(fig)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }