diff --git a/Dockerfile b/Dockerfile index 0aa8caa..204b367 100755 --- a/Dockerfile +++ b/Dockerfile @@ -1,113 +1,84 @@ -FROM alpine:3.7 +FROM alpine:3.13.5 -# Internal environment variables ENV APP_DIR=/srv/app -ENV SRC_DIR=/srv/app/src -ENV CKAN_INI=${APP_DIR}/ckan.ini -ENV PIP_SRC=${SRC_DIR} -ENV CKAN_STORAGE_PATH=/var/lib/ckan -ENV GIT_URL=https://github.com/ckan/ckan.git -# CKAN version to build -ENV GIT_BRANCH=ckan-2.9.3 -# Customize these on the .env file if needed -ENV CKAN_SITE_URL=http://localhost:5000 +ENV SRC_DIR=${APP_DIR}/src +ENV GIT_URL https://github.com/kowh-ai/datapusher.git +#ENV GIT_URL https://github.com/keitaroinc/datapusher.git +ENV GIT_BRANCH master +ENV JOB_CONFIG ${APP_DIR}/datapusher_settings.py ENV CKAN__PLUGINS image_view text_view recline_view datastore datapusher envvars WORKDIR ${APP_DIR} -# Install necessary packages to run CKAN -RUN apk add --no-cache tzdata \ - git \ - gettext \ - postgresql-client \ +RUN apk upgrade && \ + apk add --no-cache \ python3 \ - apache2-utils \ - libxml2 \ - libxslt \ - musl-dev \ - uwsgi-http \ - uwsgi-corerouter \ - uwsgi-python3 \ - py3-gevent \ - uwsgi-gevent \ - libmagic \ curl \ - sudo && \ - # Packages to build CKAN requirements and plugins - apk add --no-cache --virtual .build-deps \ - postgresql-dev \ gcc \ make \ g++ \ autoconf \ automake \ - libtool \ + libtool \ + git \ + musl-dev \ python3-dev \ - py3-virtualenv \ - libxml2-dev \ + libffi-dev \ + openssl-dev \ + libxml2-dev \ libxslt-dev \ - linux-headers && \ - # Create SRC_DIR - mkdir -p ${SRC_DIR} + rust \ + cargo + +RUN apk add --no-cache \ + uwsgi \ + uwsgi-http \ + uwsgi-corerouter \ + uwsgi-python + +# Create the src directory +RUN mkdir -p ${SRC_DIR} # Install pip RUN curl -o ${SRC_DIR}/get-pip.py https://bootstrap.pypa.io/get-pip.py && \ python3 ${SRC_DIR}/get-pip.py -# Set up Python3 virtual environment -RUN cd ${APP_DIR} && \ - python3 -m venv ${APP_DIR} && \ - source ${APP_DIR}/bin/activate +# Install datapusher +RUN cd ${SRC_DIR} && \ + git clone -b ${GIT_BRANCH} --depth=1 --single-branch ${GIT_URL} && \ + cd datapusher && \ + python3 setup.py install && \ + pip3 install --no-cache-dir -r requirements.txt -# Virtual environment binaries/scripts to be used first -ENV PATH=${APP_DIR}/bin:${PATH} - -# Install CKAN, uwsgi plus extensions -RUN pip3 install -e git+${GIT_URL}@${GIT_BRANCH}#egg=ckan && \ - pip3 install uwsgi && \ - cd ${SRC_DIR}/ckan && \ - cp who.ini ${APP_DIR} && \ - pip install --no-binary :all: -r requirements.txt && \ - # Install CKAN envvars to support loading config from environment variables - pip3 install -e git+https://github.com/okfn/ckanext-envvars.git#egg=ckanext-envvars && \ - # Install CKAN extensions - pip3 install -e 'git+https://github.com/DataShades/ckanext-xloader@py3#egg=ckanext-xloader' && \ - pip3 install -r $CKAN_VENV/src/ckanext-xloader/requirements.txt && \ - pip3 install -U requests[security] && \ - pip3 install -e 'git+https://github.com/DataShades/ckanext-harvest.git@py3#egg=ckanext-harvest' && \ - pip3 install -r $CKAN_VENV/src/ckanext-harvest/pip-requirements.txt && \ - pip3 install -e 'git+https://github.com/DataShades/ckanext-syndicate@py3#egg=ckanext-syndicate' && \ - pip3 install -r $CKAN_VENV/src/ckanext-syndicate/requirements.txt && \ - pip3 install -e 'git+https://github.com/ckan/ckanext-scheming.git@master#egg=ckanext-scheming' && \ - pip3 install -r $CKAN_VENV/src/ckanext-scheming/requirements.txt - -# Create and update CKAN config -RUN ckan generate config ${CKAN_INI} +RUN cp ${APP_DIR}/src/datapusher/deployment/*.* ${APP_DIR} && \ + # Remove default values in ini file + sed -i '/http/d' ${APP_DIR}/datapusher-uwsgi.ini && \ + sed -i '/wsgi-file/d' ${APP_DIR}/datapusher-uwsgi.ini && \ + sed -i '/virtualenv/d' ${APP_DIR}/datapusher-uwsgi.ini + # Remove src files + #rm -rf ${APP_DIR}/src # Install and configure supervisor RUN pip3 install supervisor && \ mkdir /etc/supervisord.d # Copy all setup files -COPY setup ${APP_DIR} -COPY setup/supervisor.worker.conf /etc/supervisord.d/worker.conf +COPY setup/start_datapusher.sh ${APP_DIR} +COPY setup/datapusher-uwsgi.ini ${APP_DIR} +COPY setup/datapusher_settings.py ${APP_DIR} +COPY setup/supervisor.uwsgi.conf /etc/supervisord.d/datapusher.uwsgi.conf COPY setup/supervisord.conf /etc/supervisord.conf -# Create a local user and group to run the app -RUN addgroup -g 92 -S ckan && \ - adduser -u 92 -h /srv/app -H -D -S -G ckan ckan +# Create a 'ckan' local user and group to run the app +RUN addgroup -g 92 -S www-data && \ + adduser -u 92 -h /srv/app -H -D -S -G www-data www-data -# Create local storage folder -RUN mkdir -p $CKAN_STORAGE_PATH && \ - chown -R ckan:ckan $CKAN_STORAGE_PATH +# Set timezone +RUN echo "UTC" > /etc/timezone && \ + # Change ownership to app user + chown -R www-data:www-data /srv/app -# Create entrypoint directory for children image scripts -ONBUILD RUN mkdir /docker-entrypoint.d - -RUN chown ckan -R /srv/app - -EXPOSE 5000 - -HEALTHCHECK --interval=10s --timeout=5s --retries=5 CMD curl --fail http://localhost:5000/api/3/action/status_show || exit 1 - -CMD ["/srv/app/start_ckan.sh"] \ No newline at end of file +EXPOSE 8800 +CMD ["/srv/app/start_datapusher.sh"] +#CMD ["sh", "-c", \ +# "uwsgi --plugins=http,python --http=0.0.0.0:8800 --socket=/tmp/uwsgi.sock --ini=`echo ${APP_DIR}`/datapusher-uwsgi.ini --wsgi-file=`echo ${APP_DIR}`/datapusher.wsgi"] diff --git a/setup/datapusher-uwsgi.ini b/setup/datapusher-uwsgi.ini new file mode 100644 index 0000000..4c8274a --- /dev/null +++ b/setup/datapusher-uwsgi.ini @@ -0,0 +1,14 @@ +[uwsgi] +http = 0.0.0.0:8800 +uid = www-data +guid = www-data +wsgi-file = /srv/app/datapusher.wsgi +master = true +socket = /tmp/uwsgi.sock +plugins = http,python +pidfile = /tmp/%n.pid +harakiri = 50 +max-requests = 5000 +vacuum = true +callable = application +buffer-size = 32768 \ No newline at end of file diff --git a/setup/datapusher_settings.py b/setup/datapusher_settings.py new file mode 100644 index 0000000..d6ee97a --- /dev/null +++ b/setup/datapusher_settings.py @@ -0,0 +1,37 @@ +import os +import uuid + +DEBUG = False +TESTING = False +SECRET_KEY = str(uuid.uuid4()) +USERNAME = str(uuid.uuid4()) +PASSWORD = str(uuid.uuid4()) + +NAME = 'datapusher' + +# Webserver host and port + +HOST = os.environ.get('DATAPUSHER_HOST', '0.0.0.0') +PORT = os.environ.get('DATAPUSHER_PORT', 8800) + +# Database + +SQLALCHEMY_DATABASE_URI = os.environ.get('DATAPUSHER_SQLALCHEMY_DATABASE_URI', 'sqlite:////tmp/job_store.db') + +# Download and streaming settings + +MAX_CONTENT_LENGTH = int(os.environ.get('DATAPUSHER_MAX_CONTENT_LENGTH', '1024000')) +CHUNK_SIZE = int(os.environ.get('DATAPUSHER_CHUNK_SIZE', '16384')) +CHUNK_INSERT_ROWS = int(os.environ.get('DATAPUSHER_CHUNK_INSERT_ROWS', '250')) +DOWNLOAD_TIMEOUT = int(os.environ.get('DATAPUSHER_DOWNLOAD_TIMEOUT', '30')) + +# Verify SSL +SSL_VERIFY = os.environ.get('DATAPUSHER_SSL_VERIFY', True) + +# logging +#LOG_FILE = '/tmp/ckan_service.log' +STDERR = True + +# Rewrite resource URL's when ckan callback url base is used +REWRITE_RESOURCES = os.environ.get('DATAPUSHER_REWRITE_RESOURCES', True) +REWRITE_URL = os.environ.get('DATAPUSHER_REWRITE_URL', 'http://ckan:5000/') diff --git a/setup/start_datapusher.sh b/setup/start_datapusher.sh new file mode 100755 index 0000000..3b78d19 --- /dev/null +++ b/setup/start_datapusher.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +echo "[start_datapusher.sh] Starting supervisord." +# Start supervisord +supervisord --configuration /etc/supervisord.conf diff --git a/setup/supervisor.uwsgi.conf b/setup/supervisor.uwsgi.conf new file mode 100644 index 0000000..67a6c37 --- /dev/null +++ b/setup/supervisor.uwsgi.conf @@ -0,0 +1,12 @@ +[program:datapusher-uwsgi] +command=/usr/sbin/uwsgi -i /srv/app/datapusher-uwsgi.ini +priority=501 +autostart=true +autorestart=true +redirect_stderr=true +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stdout +stderr_logfile_maxbytes=0 +user=www-data +environment=HOME="/srv/app",USER="www-data" \ No newline at end of file diff --git a/setup/supervisord.conf b/setup/supervisord.conf index 052dbc5..f21c89c 100644 --- a/setup/supervisord.conf +++ b/setup/supervisord.conf @@ -7,7 +7,7 @@ chown = nobody:nogroup logfile = /tmp/supervisord.log logfile_maxbytes = 50MB logfile_backups=10 -loglevel = info +loglevel = trace pidfile = /tmp/supervisord.pid nodaemon = true umask = 022