first import
This commit is contained in:
commit
dcb8dbc4bd
|
@ -0,0 +1,89 @@
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
env/
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
target/
|
||||||
|
|
||||||
|
# DotEnv configuration
|
||||||
|
.env
|
||||||
|
|
||||||
|
# Database
|
||||||
|
*.db
|
||||||
|
*.rdb
|
||||||
|
|
||||||
|
# Pycharm
|
||||||
|
.idea
|
||||||
|
|
||||||
|
# VS Code
|
||||||
|
.vscode/
|
||||||
|
|
||||||
|
# Spyder
|
||||||
|
.spyproject/
|
||||||
|
|
||||||
|
# Jupyter NB Checkpoints
|
||||||
|
.ipynb_checkpoints/
|
||||||
|
|
||||||
|
# exclude data from source control by default
|
||||||
|
/data/
|
||||||
|
|
||||||
|
# Mac OS-specific storage files
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
# vim
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
|
||||||
|
# Mypy cache
|
||||||
|
.mypy_cache/
|
|
@ -0,0 +1,10 @@
|
||||||
|
|
||||||
|
The MIT License (MIT)
|
||||||
|
Copyright (c) 2021, Andrea Mannocci
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
|
@ -0,0 +1,144 @@
|
||||||
|
.PHONY: clean data lint requirements sync_data_to_s3 sync_data_from_s3
|
||||||
|
|
||||||
|
#################################################################################
|
||||||
|
# GLOBALS #
|
||||||
|
#################################################################################
|
||||||
|
|
||||||
|
PROJECT_DIR := $(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))
|
||||||
|
BUCKET = [OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')
|
||||||
|
PROFILE = default
|
||||||
|
PROJECT_NAME = data-registries
|
||||||
|
PYTHON_INTERPRETER = python3
|
||||||
|
|
||||||
|
ifeq (,$(shell which conda))
|
||||||
|
HAS_CONDA=False
|
||||||
|
else
|
||||||
|
HAS_CONDA=True
|
||||||
|
endif
|
||||||
|
|
||||||
|
#################################################################################
|
||||||
|
# COMMANDS #
|
||||||
|
#################################################################################
|
||||||
|
|
||||||
|
## Install Python Dependencies
|
||||||
|
requirements: test_environment
|
||||||
|
$(PYTHON_INTERPRETER) -m pip install -U pip setuptools wheel
|
||||||
|
$(PYTHON_INTERPRETER) -m pip install -r requirements.txt
|
||||||
|
|
||||||
|
## Make Dataset
|
||||||
|
data: requirements
|
||||||
|
$(PYTHON_INTERPRETER) src/data/make_dataset.py data/raw data/processed
|
||||||
|
|
||||||
|
## Delete all compiled Python files
|
||||||
|
clean:
|
||||||
|
find . -type f -name "*.py[co]" -delete
|
||||||
|
find . -type d -name "__pycache__" -delete
|
||||||
|
|
||||||
|
## Lint using flake8
|
||||||
|
lint:
|
||||||
|
flake8 src
|
||||||
|
|
||||||
|
## Upload Data to S3
|
||||||
|
sync_data_to_s3:
|
||||||
|
ifeq (default,$(PROFILE))
|
||||||
|
aws s3 sync data/ s3://$(BUCKET)/data/
|
||||||
|
else
|
||||||
|
aws s3 sync data/ s3://$(BUCKET)/data/ --profile $(PROFILE)
|
||||||
|
endif
|
||||||
|
|
||||||
|
## Download Data from S3
|
||||||
|
sync_data_from_s3:
|
||||||
|
ifeq (default,$(PROFILE))
|
||||||
|
aws s3 sync s3://$(BUCKET)/data/ data/
|
||||||
|
else
|
||||||
|
aws s3 sync s3://$(BUCKET)/data/ data/ --profile $(PROFILE)
|
||||||
|
endif
|
||||||
|
|
||||||
|
## Set up python interpreter environment
|
||||||
|
create_environment:
|
||||||
|
ifeq (True,$(HAS_CONDA))
|
||||||
|
@echo ">>> Detected conda, creating conda environment."
|
||||||
|
ifeq (3,$(findstring 3,$(PYTHON_INTERPRETER)))
|
||||||
|
conda create --name $(PROJECT_NAME) python=3
|
||||||
|
else
|
||||||
|
conda create --name $(PROJECT_NAME) python=2.7
|
||||||
|
endif
|
||||||
|
@echo ">>> New conda env created. Activate with:\nsource activate $(PROJECT_NAME)"
|
||||||
|
else
|
||||||
|
$(PYTHON_INTERPRETER) -m pip install -q virtualenv virtualenvwrapper
|
||||||
|
@echo ">>> Installing virtualenvwrapper if not already installed.\nMake sure the following lines are in shell startup file\n\
|
||||||
|
export WORKON_HOME=$$HOME/.virtualenvs\nexport PROJECT_HOME=$$HOME/Devel\nsource /usr/local/bin/virtualenvwrapper.sh\n"
|
||||||
|
@bash -c "source `which virtualenvwrapper.sh`;mkvirtualenv $(PROJECT_NAME) --python=$(PYTHON_INTERPRETER)"
|
||||||
|
@echo ">>> New virtualenv created. Activate with:\nworkon $(PROJECT_NAME)"
|
||||||
|
endif
|
||||||
|
|
||||||
|
## Test python environment is setup correctly
|
||||||
|
test_environment:
|
||||||
|
$(PYTHON_INTERPRETER) test_environment.py
|
||||||
|
|
||||||
|
#################################################################################
|
||||||
|
# PROJECT RULES #
|
||||||
|
#################################################################################
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#################################################################################
|
||||||
|
# Self Documenting Commands #
|
||||||
|
#################################################################################
|
||||||
|
|
||||||
|
.DEFAULT_GOAL := help
|
||||||
|
|
||||||
|
# Inspired by <http://marmelab.com/blog/2016/02/29/auto-documented-makefile.html>
|
||||||
|
# sed script explained:
|
||||||
|
# /^##/:
|
||||||
|
# * save line in hold space
|
||||||
|
# * purge line
|
||||||
|
# * Loop:
|
||||||
|
# * append newline + line to hold space
|
||||||
|
# * go to next line
|
||||||
|
# * if line starts with doc comment, strip comment character off and loop
|
||||||
|
# * remove target prerequisites
|
||||||
|
# * append hold space (+ newline) to line
|
||||||
|
# * replace newline plus comments by `---`
|
||||||
|
# * print line
|
||||||
|
# Separate expressions are necessary because labels cannot be delimited by
|
||||||
|
# semicolon; see <http://stackoverflow.com/a/11799865/1968>
|
||||||
|
.PHONY: help
|
||||||
|
help:
|
||||||
|
@echo "$$(tput bold)Available rules:$$(tput sgr0)"
|
||||||
|
@echo
|
||||||
|
@sed -n -e "/^## / { \
|
||||||
|
h; \
|
||||||
|
s/.*//; \
|
||||||
|
:doc" \
|
||||||
|
-e "H; \
|
||||||
|
n; \
|
||||||
|
s/^## //; \
|
||||||
|
t doc" \
|
||||||
|
-e "s/:.*//; \
|
||||||
|
G; \
|
||||||
|
s/\\n## /---/; \
|
||||||
|
s/\\n/ /g; \
|
||||||
|
p; \
|
||||||
|
}" ${MAKEFILE_LIST} \
|
||||||
|
| LC_ALL='C' sort --ignore-case \
|
||||||
|
| awk -F '---' \
|
||||||
|
-v ncol=$$(tput cols) \
|
||||||
|
-v indent=19 \
|
||||||
|
-v col_on="$$(tput setaf 6)" \
|
||||||
|
-v col_off="$$(tput sgr0)" \
|
||||||
|
'{ \
|
||||||
|
printf "%s%*s%s ", col_on, -indent, $$1, col_off; \
|
||||||
|
n = split($$2, words, " "); \
|
||||||
|
line_length = ncol - indent; \
|
||||||
|
for (i = 1; i <= n; i++) { \
|
||||||
|
line_length -= length(words[i]) + 1; \
|
||||||
|
if (line_length <= 0) { \
|
||||||
|
line_length = ncol - indent - length(words[i]) - 1; \
|
||||||
|
printf "\n%*s ", -indent, " "; \
|
||||||
|
} \
|
||||||
|
printf "%s ", words[i]; \
|
||||||
|
} \
|
||||||
|
printf "\n"; \
|
||||||
|
}' \
|
||||||
|
| more $(shell test $(shell uname) = Darwin && echo '--no-init --raw-control-chars')
|
|
@ -0,0 +1,57 @@
|
||||||
|
data-registries
|
||||||
|
==============================
|
||||||
|
|
||||||
|
A short description of the project.
|
||||||
|
|
||||||
|
Project Organization
|
||||||
|
------------
|
||||||
|
|
||||||
|
├── LICENSE
|
||||||
|
├── Makefile <- Makefile with commands like `make data` or `make train`
|
||||||
|
├── README.md <- The top-level README for developers using this project.
|
||||||
|
├── data
|
||||||
|
│ ├── external <- Data from third party sources.
|
||||||
|
│ ├── interim <- Intermediate data that has been transformed.
|
||||||
|
│ ├── processed <- The final, canonical data sets for modeling.
|
||||||
|
│ └── raw <- The original, immutable data dump.
|
||||||
|
│
|
||||||
|
├── docs <- A default Sphinx project; see sphinx-doc.org for details
|
||||||
|
│
|
||||||
|
├── models <- Trained and serialized models, model predictions, or model summaries
|
||||||
|
│
|
||||||
|
├── notebooks <- Jupyter notebooks. Naming convention is a number (for ordering),
|
||||||
|
│ the creator's initials, and a short `-` delimited description, e.g.
|
||||||
|
│ `1.0-jqp-initial-data-exploration`.
|
||||||
|
│
|
||||||
|
├── references <- Data dictionaries, manuals, and all other explanatory materials.
|
||||||
|
│
|
||||||
|
├── reports <- Generated analysis as HTML, PDF, LaTeX, etc.
|
||||||
|
│ └── figures <- Generated graphics and figures to be used in reporting
|
||||||
|
│
|
||||||
|
├── requirements.txt <- The requirements file for reproducing the analysis environment, e.g.
|
||||||
|
│ generated with `pip freeze > requirements.txt`
|
||||||
|
│
|
||||||
|
├── setup.py <- makes project pip installable (pip install -e .) so src can be imported
|
||||||
|
├── src <- Source code for use in this project.
|
||||||
|
│ ├── __init__.py <- Makes src a Python module
|
||||||
|
│ │
|
||||||
|
│ ├── data <- Scripts to download or generate data
|
||||||
|
│ │ └── make_dataset.py
|
||||||
|
│ │
|
||||||
|
│ ├── features <- Scripts to turn raw data into features for modeling
|
||||||
|
│ │ └── build_features.py
|
||||||
|
│ │
|
||||||
|
│ ├── models <- Scripts to train models and then use trained models to make
|
||||||
|
│ │ │ predictions
|
||||||
|
│ │ ├── predict_model.py
|
||||||
|
│ │ └── train_model.py
|
||||||
|
│ │
|
||||||
|
│ └── visualization <- Scripts to create exploratory and results oriented visualizations
|
||||||
|
│ └── visualize.py
|
||||||
|
│
|
||||||
|
└── tox.ini <- tox file with settings for running tox; see tox.readthedocs.io
|
||||||
|
|
||||||
|
|
||||||
|
--------
|
||||||
|
|
||||||
|
<p><small>Project based on the <a target="_blank" href="https://drivendata.github.io/cookiecutter-data-science/">cookiecutter data science project template</a>. #cookiecutterdatascience</small></p>
|
|
@ -0,0 +1,153 @@
|
||||||
|
# Makefile for Sphinx documentation
|
||||||
|
#
|
||||||
|
|
||||||
|
# You can set these variables from the command line.
|
||||||
|
SPHINXOPTS =
|
||||||
|
SPHINXBUILD = sphinx-build
|
||||||
|
PAPER =
|
||||||
|
BUILDDIR = _build
|
||||||
|
|
||||||
|
# Internal variables.
|
||||||
|
PAPEROPT_a4 = -D latex_paper_size=a4
|
||||||
|
PAPEROPT_letter = -D latex_paper_size=letter
|
||||||
|
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||||
|
# the i18n builder cannot share the environment and doctrees with the others
|
||||||
|
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
|
||||||
|
|
||||||
|
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
|
||||||
|
|
||||||
|
help:
|
||||||
|
@echo "Please use \`make <target>' where <target> is one of"
|
||||||
|
@echo " html to make standalone HTML files"
|
||||||
|
@echo " dirhtml to make HTML files named index.html in directories"
|
||||||
|
@echo " singlehtml to make a single large HTML file"
|
||||||
|
@echo " pickle to make pickle files"
|
||||||
|
@echo " json to make JSON files"
|
||||||
|
@echo " htmlhelp to make HTML files and a HTML help project"
|
||||||
|
@echo " qthelp to make HTML files and a qthelp project"
|
||||||
|
@echo " devhelp to make HTML files and a Devhelp project"
|
||||||
|
@echo " epub to make an epub"
|
||||||
|
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
|
||||||
|
@echo " latexpdf to make LaTeX files and run them through pdflatex"
|
||||||
|
@echo " text to make text files"
|
||||||
|
@echo " man to make manual pages"
|
||||||
|
@echo " texinfo to make Texinfo files"
|
||||||
|
@echo " info to make Texinfo files and run them through makeinfo"
|
||||||
|
@echo " gettext to make PO message catalogs"
|
||||||
|
@echo " changes to make an overview of all changed/added/deprecated items"
|
||||||
|
@echo " linkcheck to check all external links for integrity"
|
||||||
|
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
|
||||||
|
|
||||||
|
clean:
|
||||||
|
-rm -rf $(BUILDDIR)/*
|
||||||
|
|
||||||
|
html:
|
||||||
|
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
|
||||||
|
|
||||||
|
dirhtml:
|
||||||
|
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
|
||||||
|
|
||||||
|
singlehtml:
|
||||||
|
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
|
||||||
|
|
||||||
|
pickle:
|
||||||
|
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can process the pickle files."
|
||||||
|
|
||||||
|
json:
|
||||||
|
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can process the JSON files."
|
||||||
|
|
||||||
|
htmlhelp:
|
||||||
|
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can run HTML Help Workshop with the" \
|
||||||
|
".hhp project file in $(BUILDDIR)/htmlhelp."
|
||||||
|
|
||||||
|
qthelp:
|
||||||
|
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
|
||||||
|
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
|
||||||
|
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/data-registries.qhcp"
|
||||||
|
@echo "To view the help file:"
|
||||||
|
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/data-registries.qhc"
|
||||||
|
|
||||||
|
devhelp:
|
||||||
|
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
|
||||||
|
@echo
|
||||||
|
@echo "Build finished."
|
||||||
|
@echo "To view the help file:"
|
||||||
|
@echo "# mkdir -p $$HOME/.local/share/devhelp/data-registries"
|
||||||
|
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/data-registries"
|
||||||
|
@echo "# devhelp"
|
||||||
|
|
||||||
|
epub:
|
||||||
|
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
|
||||||
|
|
||||||
|
latex:
|
||||||
|
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||||
|
@echo
|
||||||
|
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
|
||||||
|
@echo "Run \`make' in that directory to run these through (pdf)latex" \
|
||||||
|
"(use \`make latexpdf' here to do that automatically)."
|
||||||
|
|
||||||
|
latexpdf:
|
||||||
|
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
|
||||||
|
@echo "Running LaTeX files through pdflatex..."
|
||||||
|
$(MAKE) -C $(BUILDDIR)/latex all-pdf
|
||||||
|
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
|
||||||
|
|
||||||
|
text:
|
||||||
|
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The text files are in $(BUILDDIR)/text."
|
||||||
|
|
||||||
|
man:
|
||||||
|
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
|
||||||
|
|
||||||
|
texinfo:
|
||||||
|
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
|
||||||
|
@echo "Run \`make' in that directory to run these through makeinfo" \
|
||||||
|
"(use \`make info' here to do that automatically)."
|
||||||
|
|
||||||
|
info:
|
||||||
|
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
|
||||||
|
@echo "Running Texinfo files through makeinfo..."
|
||||||
|
make -C $(BUILDDIR)/texinfo info
|
||||||
|
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
|
||||||
|
|
||||||
|
gettext:
|
||||||
|
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
|
||||||
|
@echo
|
||||||
|
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
|
||||||
|
|
||||||
|
changes:
|
||||||
|
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
|
||||||
|
@echo
|
||||||
|
@echo "The overview file is in $(BUILDDIR)/changes."
|
||||||
|
|
||||||
|
linkcheck:
|
||||||
|
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
|
||||||
|
@echo
|
||||||
|
@echo "Link check complete; look for any errors in the above output " \
|
||||||
|
"or in $(BUILDDIR)/linkcheck/output.txt."
|
||||||
|
|
||||||
|
doctest:
|
||||||
|
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
|
||||||
|
@echo "Testing of doctests in the sources finished, look at the " \
|
||||||
|
"results in $(BUILDDIR)/doctest/output.txt."
|
|
@ -0,0 +1,10 @@
|
||||||
|
Commands
|
||||||
|
========
|
||||||
|
|
||||||
|
The Makefile contains the central entry points for common tasks related to this project.
|
||||||
|
|
||||||
|
Syncing data to S3
|
||||||
|
^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
* `make sync_data_to_s3` will use `aws s3 sync` to recursively sync files in `data/` up to `s3://[OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')/data/`.
|
||||||
|
* `make sync_data_from_s3` will use `aws s3 sync` to recursively sync files from `s3://[OPTIONAL] your-bucket-for-syncing-data (do not include 's3://')/data/` to `data/`.
|
|
@ -0,0 +1,244 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
#
|
||||||
|
# data-registries documentation build configuration file, created by
|
||||||
|
# sphinx-quickstart.
|
||||||
|
#
|
||||||
|
# This file is execfile()d with the current directory set to its containing dir.
|
||||||
|
#
|
||||||
|
# Note that not all possible configuration values are present in this
|
||||||
|
# autogenerated file.
|
||||||
|
#
|
||||||
|
# All configuration values have a default; values that are commented out
|
||||||
|
# serve to show the default.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# If extensions (or modules to document with autodoc) are in another directory,
|
||||||
|
# add these directories to sys.path here. If the directory is relative to the
|
||||||
|
# documentation root, use os.path.abspath to make it absolute, like shown here.
|
||||||
|
# sys.path.insert(0, os.path.abspath('.'))
|
||||||
|
|
||||||
|
# -- General configuration -----------------------------------------------------
|
||||||
|
|
||||||
|
# If your documentation needs a minimal Sphinx version, state it here.
|
||||||
|
# needs_sphinx = '1.0'
|
||||||
|
|
||||||
|
# Add any Sphinx extension module names here, as strings. They can be extensions
|
||||||
|
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
|
||||||
|
extensions = []
|
||||||
|
|
||||||
|
# Add any paths that contain templates here, relative to this directory.
|
||||||
|
templates_path = ['_templates']
|
||||||
|
|
||||||
|
# The suffix of source filenames.
|
||||||
|
source_suffix = '.rst'
|
||||||
|
|
||||||
|
# The encoding of source files.
|
||||||
|
# source_encoding = 'utf-8-sig'
|
||||||
|
|
||||||
|
# The master toctree document.
|
||||||
|
master_doc = 'index'
|
||||||
|
|
||||||
|
# General information about the project.
|
||||||
|
project = u'data-registries'
|
||||||
|
|
||||||
|
# The version info for the project you're documenting, acts as replacement for
|
||||||
|
# |version| and |release|, also used in various other places throughout the
|
||||||
|
# built documents.
|
||||||
|
#
|
||||||
|
# The short X.Y version.
|
||||||
|
version = '0.1'
|
||||||
|
# The full version, including alpha/beta/rc tags.
|
||||||
|
release = '0.1'
|
||||||
|
|
||||||
|
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||||
|
# for a list of supported languages.
|
||||||
|
# language = None
|
||||||
|
|
||||||
|
# There are two options for replacing |today|: either, you set today to some
|
||||||
|
# non-false value, then it is used:
|
||||||
|
# today = ''
|
||||||
|
# Else, today_fmt is used as the format for a strftime call.
|
||||||
|
# today_fmt = '%B %d, %Y'
|
||||||
|
|
||||||
|
# List of patterns, relative to source directory, that match files and
|
||||||
|
# directories to ignore when looking for source files.
|
||||||
|
exclude_patterns = ['_build']
|
||||||
|
|
||||||
|
# The reST default role (used for this markup: `text`) to use for all documents.
|
||||||
|
# default_role = None
|
||||||
|
|
||||||
|
# If true, '()' will be appended to :func: etc. cross-reference text.
|
||||||
|
# add_function_parentheses = True
|
||||||
|
|
||||||
|
# If true, the current module name will be prepended to all description
|
||||||
|
# unit titles (such as .. function::).
|
||||||
|
# add_module_names = True
|
||||||
|
|
||||||
|
# If true, sectionauthor and moduleauthor directives will be shown in the
|
||||||
|
# output. They are ignored by default.
|
||||||
|
# show_authors = False
|
||||||
|
|
||||||
|
# The name of the Pygments (syntax highlighting) style to use.
|
||||||
|
pygments_style = 'sphinx'
|
||||||
|
|
||||||
|
# A list of ignored prefixes for module index sorting.
|
||||||
|
# modindex_common_prefix = []
|
||||||
|
|
||||||
|
|
||||||
|
# -- Options for HTML output ---------------------------------------------------
|
||||||
|
|
||||||
|
# The theme to use for HTML and HTML Help pages. See the documentation for
|
||||||
|
# a list of builtin themes.
|
||||||
|
html_theme = 'default'
|
||||||
|
|
||||||
|
# Theme options are theme-specific and customize the look and feel of a theme
|
||||||
|
# further. For a list of options available for each theme, see the
|
||||||
|
# documentation.
|
||||||
|
# html_theme_options = {}
|
||||||
|
|
||||||
|
# Add any paths that contain custom themes here, relative to this directory.
|
||||||
|
# html_theme_path = []
|
||||||
|
|
||||||
|
# The name for this set of Sphinx documents. If None, it defaults to
|
||||||
|
# "<project> v<release> documentation".
|
||||||
|
# html_title = None
|
||||||
|
|
||||||
|
# A shorter title for the navigation bar. Default is the same as html_title.
|
||||||
|
# html_short_title = None
|
||||||
|
|
||||||
|
# The name of an image file (relative to this directory) to place at the top
|
||||||
|
# of the sidebar.
|
||||||
|
# html_logo = None
|
||||||
|
|
||||||
|
# The name of an image file (within the static path) to use as favicon of the
|
||||||
|
# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
|
||||||
|
# pixels large.
|
||||||
|
# html_favicon = None
|
||||||
|
|
||||||
|
# Add any paths that contain custom static files (such as style sheets) here,
|
||||||
|
# relative to this directory. They are copied after the builtin static files,
|
||||||
|
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||||
|
html_static_path = ['_static']
|
||||||
|
|
||||||
|
# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
|
||||||
|
# using the given strftime format.
|
||||||
|
# html_last_updated_fmt = '%b %d, %Y'
|
||||||
|
|
||||||
|
# If true, SmartyPants will be used to convert quotes and dashes to
|
||||||
|
# typographically correct entities.
|
||||||
|
# html_use_smartypants = True
|
||||||
|
|
||||||
|
# Custom sidebar templates, maps document names to template names.
|
||||||
|
# html_sidebars = {}
|
||||||
|
|
||||||
|
# Additional templates that should be rendered to pages, maps page names to
|
||||||
|
# template names.
|
||||||
|
# html_additional_pages = {}
|
||||||
|
|
||||||
|
# If false, no module index is generated.
|
||||||
|
# html_domain_indices = True
|
||||||
|
|
||||||
|
# If false, no index is generated.
|
||||||
|
# html_use_index = True
|
||||||
|
|
||||||
|
# If true, the index is split into individual pages for each letter.
|
||||||
|
# html_split_index = False
|
||||||
|
|
||||||
|
# If true, links to the reST sources are added to the pages.
|
||||||
|
# html_show_sourcelink = True
|
||||||
|
|
||||||
|
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
|
||||||
|
# html_show_sphinx = True
|
||||||
|
|
||||||
|
# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
|
||||||
|
# html_show_copyright = True
|
||||||
|
|
||||||
|
# If true, an OpenSearch description file will be output, and all pages will
|
||||||
|
# contain a <link> tag referring to it. The value of this option must be the
|
||||||
|
# base URL from which the finished HTML is served.
|
||||||
|
# html_use_opensearch = ''
|
||||||
|
|
||||||
|
# This is the file name suffix for HTML files (e.g. ".xhtml").
|
||||||
|
# html_file_suffix = None
|
||||||
|
|
||||||
|
# Output file base name for HTML help builder.
|
||||||
|
htmlhelp_basename = 'data-registriesdoc'
|
||||||
|
|
||||||
|
|
||||||
|
# -- Options for LaTeX output --------------------------------------------------
|
||||||
|
|
||||||
|
latex_elements = {
|
||||||
|
# The paper size ('letterpaper' or 'a4paper').
|
||||||
|
# 'papersize': 'letterpaper',
|
||||||
|
|
||||||
|
# The font size ('10pt', '11pt' or '12pt').
|
||||||
|
# 'pointsize': '10pt',
|
||||||
|
|
||||||
|
# Additional stuff for the LaTeX preamble.
|
||||||
|
# 'preamble': '',
|
||||||
|
}
|
||||||
|
|
||||||
|
# Grouping the document tree into LaTeX files. List of tuples
|
||||||
|
# (source start file, target name, title, author, documentclass [howto/manual]).
|
||||||
|
latex_documents = [
|
||||||
|
('index',
|
||||||
|
'data-registries.tex',
|
||||||
|
u'data-registries Documentation',
|
||||||
|
u"Andrea Mannocci", 'manual'),
|
||||||
|
]
|
||||||
|
|
||||||
|
# The name of an image file (relative to this directory) to place at the top of
|
||||||
|
# the title page.
|
||||||
|
# latex_logo = None
|
||||||
|
|
||||||
|
# For "manual" documents, if this is true, then toplevel headings are parts,
|
||||||
|
# not chapters.
|
||||||
|
# latex_use_parts = False
|
||||||
|
|
||||||
|
# If true, show page references after internal links.
|
||||||
|
# latex_show_pagerefs = False
|
||||||
|
|
||||||
|
# If true, show URL addresses after external links.
|
||||||
|
# latex_show_urls = False
|
||||||
|
|
||||||
|
# Documents to append as an appendix to all manuals.
|
||||||
|
# latex_appendices = []
|
||||||
|
|
||||||
|
# If false, no module index is generated.
|
||||||
|
# latex_domain_indices = True
|
||||||
|
|
||||||
|
|
||||||
|
# -- Options for manual page output --------------------------------------------
|
||||||
|
|
||||||
|
# One entry per manual page. List of tuples
|
||||||
|
# (source start file, name, description, authors, manual section).
|
||||||
|
man_pages = [
|
||||||
|
('index', 'data-registries', u'data-registries Documentation',
|
||||||
|
[u"Andrea Mannocci"], 1)
|
||||||
|
]
|
||||||
|
|
||||||
|
# If true, show URL addresses after external links.
|
||||||
|
# man_show_urls = False
|
||||||
|
|
||||||
|
|
||||||
|
# -- Options for Texinfo output ------------------------------------------------
|
||||||
|
|
||||||
|
# Grouping the document tree into Texinfo files. List of tuples
|
||||||
|
# (source start file, target name, title, author,
|
||||||
|
# dir menu entry, description, category)
|
||||||
|
texinfo_documents = [
|
||||||
|
('index', 'data-registries', u'data-registries Documentation',
|
||||||
|
u"Andrea Mannocci", 'data-registries',
|
||||||
|
'A short description of the project.', 'Miscellaneous'),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Documents to append as an appendix to all manuals.
|
||||||
|
# texinfo_appendices = []
|
||||||
|
|
||||||
|
# If false, no module index is generated.
|
||||||
|
# texinfo_domain_indices = True
|
||||||
|
|
||||||
|
# How to display URL addresses: 'footnote', 'no', or 'inline'.
|
||||||
|
# texinfo_show_urls = 'footnote'
|
|
@ -0,0 +1,6 @@
|
||||||
|
Getting started
|
||||||
|
===============
|
||||||
|
|
||||||
|
This is where you describe how to get set up on a clean install, including the
|
||||||
|
commands necessary to get the raw data (using the `sync_data_from_s3` command,
|
||||||
|
for example), and then how to make the cleaned, final data sets.
|
|
@ -0,0 +1,24 @@
|
||||||
|
.. data-registries documentation master file, created by
|
||||||
|
sphinx-quickstart.
|
||||||
|
You can adapt this file completely to your liking, but it should at least
|
||||||
|
contain the root `toctree` directive.
|
||||||
|
|
||||||
|
data-registries documentation!
|
||||||
|
==============================================
|
||||||
|
|
||||||
|
Contents:
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 2
|
||||||
|
|
||||||
|
getting-started
|
||||||
|
commands
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Indices and tables
|
||||||
|
==================
|
||||||
|
|
||||||
|
* :ref:`genindex`
|
||||||
|
* :ref:`modindex`
|
||||||
|
* :ref:`search`
|
|
@ -0,0 +1,190 @@
|
||||||
|
@ECHO OFF
|
||||||
|
|
||||||
|
REM Command file for Sphinx documentation
|
||||||
|
|
||||||
|
if "%SPHINXBUILD%" == "" (
|
||||||
|
set SPHINXBUILD=sphinx-build
|
||||||
|
)
|
||||||
|
set BUILDDIR=_build
|
||||||
|
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
|
||||||
|
set I18NSPHINXOPTS=%SPHINXOPTS% .
|
||||||
|
if NOT "%PAPER%" == "" (
|
||||||
|
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
|
||||||
|
set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "" goto help
|
||||||
|
|
||||||
|
if "%1" == "help" (
|
||||||
|
:help
|
||||||
|
echo.Please use `make ^<target^>` where ^<target^> is one of
|
||||||
|
echo. html to make standalone HTML files
|
||||||
|
echo. dirhtml to make HTML files named index.html in directories
|
||||||
|
echo. singlehtml to make a single large HTML file
|
||||||
|
echo. pickle to make pickle files
|
||||||
|
echo. json to make JSON files
|
||||||
|
echo. htmlhelp to make HTML files and a HTML help project
|
||||||
|
echo. qthelp to make HTML files and a qthelp project
|
||||||
|
echo. devhelp to make HTML files and a Devhelp project
|
||||||
|
echo. epub to make an epub
|
||||||
|
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
|
||||||
|
echo. text to make text files
|
||||||
|
echo. man to make manual pages
|
||||||
|
echo. texinfo to make Texinfo files
|
||||||
|
echo. gettext to make PO message catalogs
|
||||||
|
echo. changes to make an overview over all changed/added/deprecated items
|
||||||
|
echo. linkcheck to check all external links for integrity
|
||||||
|
echo. doctest to run all doctests embedded in the documentation if enabled
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "clean" (
|
||||||
|
for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
|
||||||
|
del /q /s %BUILDDIR%\*
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "html" (
|
||||||
|
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "dirhtml" (
|
||||||
|
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "singlehtml" (
|
||||||
|
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "pickle" (
|
||||||
|
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished; now you can process the pickle files.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "json" (
|
||||||
|
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished; now you can process the JSON files.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "htmlhelp" (
|
||||||
|
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished; now you can run HTML Help Workshop with the ^
|
||||||
|
.hhp project file in %BUILDDIR%/htmlhelp.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "qthelp" (
|
||||||
|
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished; now you can run "qcollectiongenerator" with the ^
|
||||||
|
.qhcp project file in %BUILDDIR%/qthelp, like this:
|
||||||
|
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\data-registries.qhcp
|
||||||
|
echo.To view the help file:
|
||||||
|
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\data-registries.ghc
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "devhelp" (
|
||||||
|
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "epub" (
|
||||||
|
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The epub file is in %BUILDDIR%/epub.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "latex" (
|
||||||
|
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "text" (
|
||||||
|
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The text files are in %BUILDDIR%/text.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "man" (
|
||||||
|
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The manual pages are in %BUILDDIR%/man.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "texinfo" (
|
||||||
|
%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "gettext" (
|
||||||
|
%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "changes" (
|
||||||
|
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.The overview file is in %BUILDDIR%/changes.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "linkcheck" (
|
||||||
|
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Link check complete; look for any errors in the above output ^
|
||||||
|
or in %BUILDDIR%/linkcheck/output.txt.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
if "%1" == "doctest" (
|
||||||
|
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
|
||||||
|
if errorlevel 1 exit /b 1
|
||||||
|
echo.
|
||||||
|
echo.Testing of doctests in the sources finished, look at the ^
|
||||||
|
results in %BUILDDIR%/doctest/output.txt.
|
||||||
|
goto end
|
||||||
|
)
|
||||||
|
|
||||||
|
:end
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,10 @@
|
||||||
|
# local package
|
||||||
|
-e .
|
||||||
|
|
||||||
|
# external requirements
|
||||||
|
click
|
||||||
|
Sphinx
|
||||||
|
coverage
|
||||||
|
awscli
|
||||||
|
flake8
|
||||||
|
python-dotenv>=0.5.1
|
|
@ -0,0 +1,10 @@
|
||||||
|
from setuptools import find_packages, setup
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='src',
|
||||||
|
packages=find_packages(),
|
||||||
|
version='0.1.0',
|
||||||
|
description='A short description of the project.',
|
||||||
|
author='Andrea Mannocci',
|
||||||
|
license='MIT',
|
||||||
|
)
|
|
@ -0,0 +1,60 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import csv
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import click
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from dotenv import find_dotenv, load_dotenv
|
||||||
|
|
||||||
|
|
||||||
|
def get_value_or_none(obj, key):
|
||||||
|
if key in obj:
|
||||||
|
return obj[key]['value']
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
@click.command()
|
||||||
|
@click.argument('input_filepath', type=click.Path(exists=True))
|
||||||
|
@click.argument('output_filepath', type=click.Path())
|
||||||
|
def main(input_filepath, output_filepath):
|
||||||
|
""" Runs data processing scripts to turn raw data from (../raw) into
|
||||||
|
cleaned data ready to be analyzed (saved in ../processed).
|
||||||
|
"""
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
logger.info('making final data set from raw data')
|
||||||
|
|
||||||
|
with open(os.path.join(input_filepath, 'OpenAIRE_DS_re3data_opendoar.json'), mode='r') as f:
|
||||||
|
with open(os.path.join(output_filepath, 're3data_opendoar.csv'), mode='w') as csvfile:
|
||||||
|
csv_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||||
|
csv_writer.writerow(['id', 'url', 'official_name', 'english_name', 'description', 'latitude', 'longitude', 'subjects'])
|
||||||
|
|
||||||
|
for line in f:
|
||||||
|
repo = json.loads(line)
|
||||||
|
identifier = repo['id']
|
||||||
|
official_name = repo['officialname']['value']
|
||||||
|
url = get_value_or_none(repo, 'websiteurl')
|
||||||
|
english_name = get_value_or_none(repo, 'englishname')
|
||||||
|
description = get_value_or_none(repo, 'description')
|
||||||
|
latitude = get_value_or_none(repo, 'latitude')
|
||||||
|
longitude = get_value_or_none(repo, 'longitude')
|
||||||
|
|
||||||
|
subjects = []
|
||||||
|
for s in repo['subjects']:
|
||||||
|
subjects.append(s['value'])
|
||||||
|
|
||||||
|
csv_writer.writerow([identifier, url, official_name, english_name, description, latitude, longitude, subjects])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||||
|
logging.basicConfig(level=logging.INFO, format=log_fmt)
|
||||||
|
|
||||||
|
# not used in this stub but often useful for finding various files
|
||||||
|
project_dir = Path(__file__).resolve().parents[2]
|
||||||
|
|
||||||
|
# find .env automagically by walking up directories until it's found, then
|
||||||
|
# load up the .env entries as environment variables
|
||||||
|
load_dotenv(find_dotenv())
|
||||||
|
|
||||||
|
main()
|
|
@ -0,0 +1,25 @@
|
||||||
|
import sys
|
||||||
|
|
||||||
|
REQUIRED_PYTHON = "python3"
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
system_major = sys.version_info.major
|
||||||
|
if REQUIRED_PYTHON == "python":
|
||||||
|
required_major = 2
|
||||||
|
elif REQUIRED_PYTHON == "python3":
|
||||||
|
required_major = 3
|
||||||
|
else:
|
||||||
|
raise ValueError("Unrecognized python interpreter: {}".format(
|
||||||
|
REQUIRED_PYTHON))
|
||||||
|
|
||||||
|
if system_major != required_major:
|
||||||
|
raise TypeError(
|
||||||
|
"This project requires Python {}. Found: Python {}".format(
|
||||||
|
required_major, sys.version))
|
||||||
|
else:
|
||||||
|
print(">>> Development environment passes all tests!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Loading…
Reference in New Issue