diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 3a3e7bb..db19611 100755 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -23,8 +23,7 @@ services: datapusher: container_name: datapusher - build: - context: datapusher/ + image: kowhai/datapusher:0.0.17 ports: - "8800:8800" diff --git a/images/ckan-ext/Dockerfile b/images/ckan-ext/Dockerfile new file mode 100755 index 0000000..0aa8caa --- /dev/null +++ b/images/ckan-ext/Dockerfile @@ -0,0 +1,113 @@ +FROM alpine:3.7 + +# Internal environment variables +ENV APP_DIR=/srv/app +ENV SRC_DIR=/srv/app/src +ENV CKAN_INI=${APP_DIR}/ckan.ini +ENV PIP_SRC=${SRC_DIR} +ENV CKAN_STORAGE_PATH=/var/lib/ckan +ENV GIT_URL=https://github.com/ckan/ckan.git +# CKAN version to build +ENV GIT_BRANCH=ckan-2.9.3 +# Customize these on the .env file if needed +ENV CKAN_SITE_URL=http://localhost:5000 +ENV CKAN__PLUGINS image_view text_view recline_view datastore datapusher envvars + +WORKDIR ${APP_DIR} + +# Install necessary packages to run CKAN +RUN apk add --no-cache tzdata \ + git \ + gettext \ + postgresql-client \ + python3 \ + apache2-utils \ + libxml2 \ + libxslt \ + musl-dev \ + uwsgi-http \ + uwsgi-corerouter \ + uwsgi-python3 \ + py3-gevent \ + uwsgi-gevent \ + libmagic \ + curl \ + sudo && \ + # Packages to build CKAN requirements and plugins + apk add --no-cache --virtual .build-deps \ + postgresql-dev \ + gcc \ + make \ + g++ \ + autoconf \ + automake \ + libtool \ + python3-dev \ + py3-virtualenv \ + libxml2-dev \ + libxslt-dev \ + linux-headers && \ + # Create SRC_DIR + mkdir -p ${SRC_DIR} + +# Install pip +RUN curl -o ${SRC_DIR}/get-pip.py https://bootstrap.pypa.io/get-pip.py && \ + python3 ${SRC_DIR}/get-pip.py + +# Set up Python3 virtual environment +RUN cd ${APP_DIR} && \ + python3 -m venv ${APP_DIR} && \ + source ${APP_DIR}/bin/activate + +# Virtual environment binaries/scripts to be used first +ENV PATH=${APP_DIR}/bin:${PATH} + +# Install CKAN, uwsgi plus extensions +RUN pip3 install -e git+${GIT_URL}@${GIT_BRANCH}#egg=ckan && \ + pip3 install uwsgi && \ + cd ${SRC_DIR}/ckan && \ + cp who.ini ${APP_DIR} && \ + pip install --no-binary :all: -r requirements.txt && \ + # Install CKAN envvars to support loading config from environment variables + pip3 install -e git+https://github.com/okfn/ckanext-envvars.git#egg=ckanext-envvars && \ + # Install CKAN extensions + pip3 install -e 'git+https://github.com/DataShades/ckanext-xloader@py3#egg=ckanext-xloader' && \ + pip3 install -r $CKAN_VENV/src/ckanext-xloader/requirements.txt && \ + pip3 install -U requests[security] && \ + pip3 install -e 'git+https://github.com/DataShades/ckanext-harvest.git@py3#egg=ckanext-harvest' && \ + pip3 install -r $CKAN_VENV/src/ckanext-harvest/pip-requirements.txt && \ + pip3 install -e 'git+https://github.com/DataShades/ckanext-syndicate@py3#egg=ckanext-syndicate' && \ + pip3 install -r $CKAN_VENV/src/ckanext-syndicate/requirements.txt && \ + pip3 install -e 'git+https://github.com/ckan/ckanext-scheming.git@master#egg=ckanext-scheming' && \ + pip3 install -r $CKAN_VENV/src/ckanext-scheming/requirements.txt + +# Create and update CKAN config +RUN ckan generate config ${CKAN_INI} + +# Install and configure supervisor +RUN pip3 install supervisor && \ +mkdir /etc/supervisord.d + +# Copy all setup files +COPY setup ${APP_DIR} +COPY setup/supervisor.worker.conf /etc/supervisord.d/worker.conf +COPY setup/supervisord.conf /etc/supervisord.conf + +# Create a local user and group to run the app +RUN addgroup -g 92 -S ckan && \ + adduser -u 92 -h /srv/app -H -D -S -G ckan ckan + +# Create local storage folder +RUN mkdir -p $CKAN_STORAGE_PATH && \ + chown -R ckan:ckan $CKAN_STORAGE_PATH + +# Create entrypoint directory for children image scripts +ONBUILD RUN mkdir /docker-entrypoint.d + +RUN chown ckan -R /srv/app + +EXPOSE 5000 + +HEALTHCHECK --interval=10s --timeout=5s --retries=5 CMD curl --fail http://localhost:5000/api/3/action/status_show || exit 1 + +CMD ["/srv/app/start_ckan.sh"] \ No newline at end of file diff --git a/images/ckan-ext/setup/ckan-uwsgi.ini b/images/ckan-ext/setup/ckan-uwsgi.ini new file mode 100644 index 0000000..2361f36 --- /dev/null +++ b/images/ckan-ext/setup/ckan-uwsgi.ini @@ -0,0 +1,15 @@ +[uwsgi] +http-socket = :5000 +uid = ckan +guid = ckan +plugins = python3 +wsgi-file = /srv/app/wsgi.py +virtualenv = /srv/app +module = wsgi:application +master = true +processes = 5 +pidfile = /tmp/%n.pid +harakiri = 50 +max-requests = 5000 +vacuum = true +callable = application diff --git a/images/ckan-ext/setup/prerun.py b/images/ckan-ext/setup/prerun.py new file mode 100755 index 0000000..f8dd693 --- /dev/null +++ b/images/ckan-ext/setup/prerun.py @@ -0,0 +1,194 @@ +import os +import sys +import subprocess +import psycopg2 +import urllib3 +import time +import re + +ckan_ini = os.environ.get('CKAN_INI', '/srv/app/ckan.ini') + +RETRY = 5 + +def update_plugins(): + + plugins = os.environ.get('CKAN__PLUGINS', '') + print('[prerun] Setting the following plugins in {}:'.format(ckan_ini)) + print(plugins) + cmd = ['ckan', 'config-tool', ckan_ini, + 'ckan.plugins = {}'.format(plugins)] + subprocess.check_output(cmd, stderr=subprocess.STDOUT) + print('[prerun] Plugins set.') + + +def check_main_db_connection(retry=None): + + conn_str = os.environ.get('CKAN_SQLALCHEMY_URL') + if not conn_str: + print('[prerun] CKAN_SQLALCHEMY_URL not defined, not checking db') + return check_db_connection(conn_str, retry) + + +def check_datastore_db_connection(retry=None): + + conn_str = os.environ.get('CKAN_DATASTORE_WRITE_URL') + if not conn_str: + print('[prerun] CKAN_DATASTORE_WRITE_URL not defined, not checking db') + return check_db_connection(conn_str, retry) + + +def check_db_connection(conn_str, retry=None): + + if retry is None: + retry = RETRY + elif retry == 0: + print('[prerun] Giving up after 5 tries...') + sys.exit(1) + + try: + connection = psycopg2.connect(conn_str) + + except psycopg2.Error as e: + print(str(e)) + print('[prerun] Unable to connect to the database, waiting...') + time.sleep(10) + check_db_connection(conn_str, retry=retry - 1) + else: + connection.close() + + +def check_solr_connection(retry=None): + + if retry is None: + retry = RETRY + elif retry == 0: + print('[prerun] Giving up after 5 tries...') + sys.exit(1) + + url = os.environ.get('CKAN_SOLR_URL', '') + search_url = '{url}/select/?q=*&wt=json'.format(url=url) + http = urllib3.PoolManager() + try: + r = http.request('GET', search_url) + except urllib3.exceptions.ConnectionError as e: + print(str(e)) + print('[prerun] Unable to connect to solr, waiting...') + time.sleep(10) + check_solr_connection(retry=retry - 1) + else: + print('[prerun] Connection Status from SOLR is ', (r.status)) + +def init_db(): + + db_command = ['ckan', '-c', ckan_ini, + 'db', 'init'] + print('[prerun] Initializing or upgrading db - start') + try: + subprocess.check_output(db_command, stderr=subprocess.STDOUT) + print('[prerun] Initializing or upgrading db - end') + except subprocess.CalledProcessError as e: + if 'OperationalError' in e.output: + print(e.output) + print('[prerun] Database not ready, waiting a bit before exit...') + time.sleep(5) + sys.exit(1) + else: + print(e.output) + raise e + + +def init_datastore_db(): + + conn_str = os.environ.get('CKAN_DATASTORE_WRITE_URL') + if not conn_str: + print('[prerun] Skipping datastore initialization') + return + + datastore_perms_command = ['ckan', '-c', ckan_ini, + 'datastore', 'set-permissions'] + + connection = psycopg2.connect(conn_str) + cursor = connection.cursor() + + print('[prerun] Initializing datastore db - start') + try: + datastore_perms = subprocess.Popen( + datastore_perms_command, + stdout=subprocess.PIPE) + + perms_sql = datastore_perms.stdout.read().decode('utf-8') + # Remove internal pg command as psycopg2 does not like it + perms_sql = re.sub('\\\\connect \"(.*)\"', '', perms_sql) + cursor.execute(perms_sql) + for notice in connection.notices: + print(notice) + + connection.commit() + + print('[prerun] Initializing datastore db - end') + print(datastore_perms.stdout.read().decode('utf-8')) + except psycopg2.Error as e: + print('[prerun] Could not initialize datastore') + print(str(e)) + + except subprocess.CalledProcessError as e: + if 'OperationalError' in e.output: + print(e.output) + print('[prerun] Database not ready, waiting a bit before exit...') + time.sleep(5) + sys.exit(1) + else: + print(e.output) + raise e + finally: + cursor.close() + connection.close() + + +def create_sysadmin(): + + name = os.environ.get('CKAN_SYSADMIN_NAME') + password = os.environ.get('CKAN_SYSADMIN_PASSWORD') + email = os.environ.get('CKAN_SYSADMIN_EMAIL') + + if name and password and email: + + # Check if user exists + command = ['ckan', '-c', ckan_ini, 'user', 'show', name,] + + out = subprocess.check_output(command) + if 'User:None' not in re.sub(r'\s', '', out.decode()): + print('[prerun] Sysadmin user exists, skipping creation') + return + + # Create user + command = ['ckan', '-c', ckan_ini, 'user', 'add', + name, + 'password=' + password, + 'email=' + email] + + subprocess.call(command) + print('[prerun] Created user {0}'.format(name)) + + # Make it sysadmin + command = ['ckan', '-c', ckan_ini, 'sysadmin', 'add', + name] + + subprocess.call(command) + print('[prerun] Made user {0} a sysadmin'.format(name)) + + +if __name__ == '__main__': + + maintenance = os.environ.get('MAINTENANCE_MODE', '').lower() == 'true' + + if maintenance: + print('[prerun] Maintenance mode, skipping setup...') + else: + check_main_db_connection() + init_db() + update_plugins() + check_datastore_db_connection() + init_datastore_db() + check_solr_connection() + create_sysadmin() diff --git a/images/ckan-ext/setup/start_ckan.sh b/images/ckan-ext/setup/start_ckan.sh new file mode 100755 index 0000000..5a119e6 --- /dev/null +++ b/images/ckan-ext/setup/start_ckan.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# Run the prerun script to init CKAN and create the default admin user +sudo -u ckan -EH python3 prerun.py + +# Run any startup scripts provided by images extending this one +if [[ -d "/docker-entrypoint.d" ]] +then + for f in /docker-entrypoint.d/*; do + case "$f" in + *.sh) echo "$0: Running init file $f"; . "$f" ;; + *.py) echo "$0: Running init file $f"; python "$f"; echo ;; + *) echo "$0: Ignoring $f (not an sh or py file)" ;; + esac + echo + done +fi + + +# Check whether http basic auth password protection is enabled and enable basicauth routing on uwsgi respecfully +if [ $? -eq 0 ] +then + if [ "$PASSWORD_PROTECT" = true ] + then + if [ "$HTPASSWD_USER" ] || [ "$HTPASSWD_PASSWORD" ] + then + # Generate htpasswd file for basicauth + htpasswd -d -b -c /srv/app/.htpasswd $HTPASSWD_USER $HTPASSWD_PASSWORD + # Start supervisord + supervisord --configuration /etc/supervisord.conf & + # Start uwsgi with basicauth + sudo -u ckan -EH uwsgi -i ckan-uwsgi.ini + else + echo "Missing HTPASSWD_USER or HTPASSWD_PASSWORD environment variables. Exiting..." + exit 1 + fi + else + # Start supervisord + supervisord --configuration /etc/supervisord.conf & + # Start uwsgi + sudo -u ckan -EH uwsgi -i ckan-uwsgi.ini + fi +else + echo "[prerun] failed...not starting CKAN." +fi + diff --git a/images/ckan-ext/setup/supervisor.worker.conf b/images/ckan-ext/setup/supervisor.worker.conf new file mode 100644 index 0000000..9d46f37 --- /dev/null +++ b/images/ckan-ext/setup/supervisor.worker.conf @@ -0,0 +1,12 @@ +[program:ckan-worker] +command=ckan -c /srv/app/ckan.ini jobs worker +priority=501 +autostart=true +autorestart=true +redirect_stderr=true +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stdout +stderr_logfile_maxbytes=0 +user=ckan +environment=HOME="/srv/app",USER="ckan" \ No newline at end of file diff --git a/images/ckan-ext/setup/supervisord.conf b/images/ckan-ext/setup/supervisord.conf new file mode 100644 index 0000000..052dbc5 --- /dev/null +++ b/images/ckan-ext/setup/supervisord.conf @@ -0,0 +1,23 @@ +[unix_http_server] +file = /tmp/supervisor.sock +chmod = 0777 +chown = nobody:nogroup + +[supervisord] +logfile = /tmp/supervisord.log +logfile_maxbytes = 50MB +logfile_backups=10 +loglevel = info +pidfile = /tmp/supervisord.pid +nodaemon = true +umask = 022 +identifier = supervisor + +[supervisorctl] +serverurl = unix:///tmp/supervisor.sock + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[include] +files = /etc/supervisord.d/*.conf \ No newline at end of file diff --git a/images/ckan-ext/setup/wsgi.py b/images/ckan-ext/setup/wsgi.py new file mode 100644 index 0000000..b37d80e --- /dev/null +++ b/images/ckan-ext/setup/wsgi.py @@ -0,0 +1,9 @@ +import os +from ckan.config.middleware import make_app +from ckan.cli import CKANConfigLoader +from logging.config import fileConfig as loggingFileConfig +config_filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'ckan.ini') +abspath = os.path.join(os.path.dirname(os.path.abspath(__file__))) +loggingFileConfig(config_filepath) +config = CKANConfigLoader(config_filepath).get_config() +application = make_app(config)