diff --git a/.env.template b/.env.template index 864d5d4..9637c38 100644 --- a/.env.template +++ b/.env.template @@ -1,3 +1,12 @@ +# Container names +NGINX_CONTAINER_NAME=nginx +REDIS_CONTAINER_NAME=redis +POSTGRESQL_CONTAINER_NAME=db +SOLR_CONTAINER_NAME=solr +DATAPUSHER_CONTAINER_NAME=datapusher +CKAN_CONTAINER_NAME=ckan +WORKER_CONTAINER_NAME=ckan-worker + # CKAN databases POSTGRES_USER=ckan POSTGRES_PASSWORD=ckan @@ -14,6 +23,7 @@ TEST_CKAN_DATASTORE_WRITE_URL=postgresql://ckan:ckan@db/datastore_test TEST_CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:datastore@db/datastore_test # CKAN core +CKAN_VERSION=2.9.5 CKAN_SITE_ID=default CKAN_SITE_URL=http://ckan:5000 CKAN_PORT=5000 @@ -29,12 +39,12 @@ CKAN_SMTP_MAIL_FROM=ckan@localhost TZ=UTC # Solr -SOLR_VERSION=6.6.6 +SOLR_IMAGE_VERSION=2.9-solr8 CKAN_SOLR_URL=http://solr:8983/solr/ckan TEST_CKAN_SOLR_URL=http://solr:8983/solr/ckan # Redis -REDIS_VERSION=6.0.7 +REDIS_VERSION=6 CKAN_REDIS_URL=redis://redis:6379/1 TEST_CKAN_REDIS_URL=redis://redis:6379/1 @@ -46,7 +56,7 @@ DATAPUSHER_REWRITE_RESOURCES=True DATAPUSHER_REWRITE_URL=http://ckan:5000 # Extensions -CKAN__PLUGINS=envvars image_view text_view recline_view datastore datapusher +CKAN__PLUGINS="envvars image_view text_view recline_view datastore datapusher" CKAN__HARVEST__MQ__TYPE=redis CKAN__HARVEST__MQ__HOSTNAME=redis CKAN__HARVEST__MQ__PORT=6379 diff --git a/README.txt b/README.txt index b5ae896..e8d1121 100644 --- a/README.txt +++ b/README.txt @@ -14,7 +14,8 @@ All the other images should live in separate repos latest image to used is redis:6 4. nginx - base image: nginx:1.19.8-alpine from DockerHub, enhanced in a Dockerfile) 5. DataPusher - built from the actual datapusher repo (https://github.com/ckan/datapusher) -6. CKAN Worker - add new (ckan worker) container in the compose setup +6. CKAN - built from the ckan/ckan-base:2.9.5 base image (which is built from the ckan/ckan-docker-base repo) +7. CKAN Worker - add new (ckan worker) container in the compose setup Versions 2.9 and 2.10 (when it's out) only. Plan the repo layout for having multiple versions - OKFN could used as an example diff --git a/ckan/setup/prerun.py b/ckan/setup/prerun.py new file mode 100644 index 0000000..58c4b7c --- /dev/null +++ b/ckan/setup/prerun.py @@ -0,0 +1,204 @@ +import os +import sys +import subprocess +import psycopg2 +try: + from urllib.request import urlopen + from urllib.error import URLError +except ImportError: + from urllib2 import urlopen + from urllib2 import URLError + +import time +import re + +ckan_ini = os.environ.get("CKAN_INI", "/srv/app/ckan.ini") + +RETRY = 5 + + +def update_plugins(): + + plugins = os.environ.get("CKAN__PLUGINS", "") + print(("[prerun] Setting the following plugins in {}:".format(ckan_ini))) + print(plugins) + cmd = ["ckan", "config-tool", ckan_ini, "ckan.plugins = {}".format(plugins)] + subprocess.check_output(cmd, stderr=subprocess.STDOUT) + print("[prerun] Plugins set.") + + +def check_main_db_connection(retry=None): + + conn_str = os.environ.get("CKAN_SQLALCHEMY_URL") + if not conn_str: + print("[prerun] CKAN_SQLALCHEMY_URL not defined, not checking db") + return check_db_connection(conn_str, retry) + + +def check_datastore_db_connection(retry=None): + + conn_str = os.environ.get("CKAN_DATASTORE_WRITE_URL") + if not conn_str: + print("[prerun] CKAN_DATASTORE_WRITE_URL not defined, not checking db") + return check_db_connection(conn_str, retry) + + +def check_db_connection(conn_str, retry=None): + + if retry is None: + retry = RETRY + elif retry == 0: + print("[prerun] Giving up after 5 tries...") + sys.exit(1) + + try: + connection = psycopg2.connect(conn_str) + + except psycopg2.Error as e: + print(str(e)) + print("[prerun] Unable to connect to the database, waiting...") + time.sleep(10) + check_db_connection(conn_str, retry=retry - 1) + else: + connection.close() + + +def check_solr_connection(retry=None): + + if retry is None: + retry = RETRY + elif retry == 0: + print("[prerun] Giving up after 5 tries...") + sys.exit(1) + + url = os.environ.get("CKAN_SOLR_URL", "") + search_url = "{url}/select/?q=*&wt=json".format(url=url) + + try: + connection = urlopen(search_url) + except URLError as e: + print(str(e)) + print("[prerun] Unable to connect to solr, waiting...") + time.sleep(10) + check_solr_connection(retry=retry - 1) + else: + eval(connection.read()) + + +def init_db(): + + db_command = ["ckan", "-c", ckan_ini, "db", "init"] + print("[prerun] Initializing or upgrading db - start") + try: + subprocess.check_output(db_command, stderr=subprocess.STDOUT) + print("[prerun] Initializing or upgrading db - end") + except subprocess.CalledProcessError as e: + if "OperationalError" in e.output: + print(e.output) + print("[prerun] Database not ready, waiting a bit before exit...") + time.sleep(5) + sys.exit(1) + else: + print(e.output) + raise e + + +def init_datastore_db(): + + conn_str = os.environ.get("CKAN_DATASTORE_WRITE_URL") + if not conn_str: + print("[prerun] Skipping datastore initialization") + return + + datastore_perms_command = ["ckan", "-c", ckan_ini, "datastore", "set-permissions"] + + connection = psycopg2.connect(conn_str) + cursor = connection.cursor() + + print("[prerun] Initializing datastore db - start") + try: + datastore_perms = subprocess.Popen( + datastore_perms_command, stdout=subprocess.PIPE + ) + + perms_sql = datastore_perms.stdout.read() + # Remove internal pg command as psycopg2 does not like it + perms_sql = re.sub(b'\\\\connect "(.*)"', b"", perms_sql) + cursor.execute(perms_sql) + for notice in connection.notices: + print(notice) + + connection.commit() + + print("[prerun] Initializing datastore db - end") + print(datastore_perms.stdout.read()) + except psycopg2.Error as e: + print("[prerun] Could not initialize datastore") + print(str(e)) + + except subprocess.CalledProcessError as e: + if "OperationalError" in e.output: + print(e.output) + print("[prerun] Database not ready, waiting a bit before exit...") + time.sleep(5) + sys.exit(1) + else: + print(e.output) + raise e + finally: + cursor.close() + connection.close() + + +def create_sysadmin(): + + name = os.environ.get("CKAN_SYSADMIN_NAME") + password = os.environ.get("CKAN_SYSADMIN_PASSWORD") + email = os.environ.get("CKAN_SYSADMIN_EMAIL") + + if name and password and email: + + # Check if user exists + command = ["ckan", "-c", ckan_ini, "user", "show", name] + + out = subprocess.check_output(command) + if b"User:None" not in re.sub(b"\s", b"", out): + print("[prerun] Sysadmin user exists, skipping creation") + return + + # Create user + command = [ + "ckan", + "-c", + ckan_ini, + "user", + "add", + name, + "password=" + password, + "email=" + email, + ] + + subprocess.call(command) + print("[prerun] Created user {0}".format(name)) + + # Make it sysadmin + command = ["ckan", "-c", ckan_ini, "sysadmin", "add", name] + + subprocess.call(command) + print("[prerun] Made user {0} a sysadmin".format(name)) + + +if __name__ == "__main__": + + maintenance = os.environ.get("MAINTENANCE_MODE", "").lower() == "true" + + if maintenance: + print("[prerun] Maintenance mode, skipping setup...") + else: + check_main_db_connection() + init_db() + update_plugins() + check_datastore_db_connection() + init_datastore_db() + check_solr_connection() + create_sysadmin() diff --git a/ckan/setup/start_ckan.sh b/ckan/setup/start_ckan.sh new file mode 100755 index 0000000..e1fb8e1 --- /dev/null +++ b/ckan/setup/start_ckan.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# Run the prerun script to init CKAN and create the default admin user +sudo -u ckan -EH python3 prerun.py + +# Run any startup scripts provided by images extending this one +if [[ -d "/docker-entrypoint.d" ]] +then + for f in /docker-entrypoint.d/*; do + case "$f" in + *.sh) echo "$0: Running init file $f"; . "$f" ;; + *.py) echo "$0: Running init file $f"; python3 "$f"; echo ;; + *) echo "$0: Ignoring $f (not an sh or py file)" ;; + esac + echo + done +fi + +# Set the common uwsgi options +UWSGI_OPTS="--plugins http,python \ + --socket /tmp/uwsgi.sock \ + --wsgi-file /srv/app/wsgi.py \ + --module wsgi:application \ + --uid 92 --gid 92 \ + --http 0.0.0.0:5000 \ + --master --enable-threads \ + --lazy-apps \ + -p 2 -L -b 32768 --vacuum \ + --harakiri $UWSGI_HARAKIRI" + +if [ $? -eq 0 ] +then + # Start supervisord + supervisord --configuration /etc/supervisord.conf & + # Start uwsgi + sudo -u ckan -EH uwsgi $UWSGI_OPTS +else + echo "[prerun] failed...not starting CKAN." +fi diff --git a/ckan/setup/supervisord.conf b/ckan/setup/supervisord.conf new file mode 100644 index 0000000..a3f6671 --- /dev/null +++ b/ckan/setup/supervisord.conf @@ -0,0 +1,23 @@ +[unix_http_server] +file = /tmp/supervisor.sock +chmod = 0777 +chown = nobody:nogroup + +[supervisord] +logfile = /tmp/supervisord.log +logfile_maxbytes = 50MB +logfile_backups=10 +loglevel = info +pidfile = /tmp/supervisord.pid +nodaemon = true +umask = 022 +identifier = supervisor + +[supervisorctl] +serverurl = unix:///tmp/supervisor.sock + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[include] +files = /etc/supervisord.d/*.conf diff --git a/ckan/setup/uwsgi.conf b/ckan/setup/uwsgi.conf new file mode 100644 index 0000000..6321d6d --- /dev/null +++ b/ckan/setup/uwsgi.conf @@ -0,0 +1,2 @@ +[uwsgi] +route = ^(?!/api).*$ basicauth:Restricted,/srv/app/.htpasswd diff --git a/docker-compose.yml b/docker-compose.yml index ecc0d45..085e5b2 100755 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,6 +26,8 @@ services: dockerfile: Dockerfile args: - TZ=${TZ} + env_file: + - .env depends_on: db: condition: service_healthy