Add harvest example

This commit is contained in:
Marko Bocevski 2020-09-11 13:33:41 +02:00
parent 9d42fbff44
commit a9d0fff9cb
5 changed files with 306 additions and 0 deletions

View File

@ -0,0 +1,29 @@
# Runtime configuration of CKAN enabled through ckanext-envvars
# Information about how it works: https://github.com/okfn/ckanext-envvars
# Note that variables here take presedence over build/up time variables in .env
# General Settings
CKAN_SITE_ID=default
CKAN_SITE_URL=http://localhost:5000
CKAN_PORT=5000
CKAN_MAX_UPLOAD_SIZE_MB=10
# CKAN Plugins
CKAN__PLUGINS=envvars image_view text_view recline_view datastore datapusher harvest ckan_harvester
# CKAN requires storage path to be set in order for filestore to be enabled
CKAN__STORAGE_PATH=/srv/app/data
CKAN__WEBASSETS__PATH=/srv/app/data/webassets
# SYSADMIN settings, a sysadmin user is created automatically with the below credentials
CKAN_SYSADMIN_NAME=sysadmin
CKAN_SYSADMIN_PASSWORD=password
CKAN_SYSADMIN_EMAIL=sysadmin@ckantest.com
# Email settings
CKAN_SMTP_SERVER=smtp.corporateict.domain:25
CKAN_SMTP_STARTTLS=True
CKAN_SMTP_USER=user
CKAN_SMTP_PASSWORD=pass
CKAN_SMTP_MAIL_FROM=ckan@localhost
# Harvest settings
CKAN__HARVEST__MQ__TYPE=redis
CKAN__HARVEST__MQ__HOSTNAME=redis

29
examples/harvest/.env Normal file
View File

@ -0,0 +1,29 @@
# Variables in this file will be used as build arguments when running
# docker-compose build and docker-compose up
# Verify correct substitution with "docker-compose config"
# If variables are newly added or enabled, please delete and rebuild the images to pull in changes:
# docker-compose down -v
# docker-compose build
# docker-compose up -d
# Database
POSTGRES_PASSWORD=ckan
POSTGRES_PORT=5432
DATASTORE_READONLY_PASSWORD=datastore
# CKAN
CKAN_VERSION=2.9.0
CKAN_SITE_ID=default
CKAN_SITE_URL=http://localhost:5000
CKAN_PORT=5000
CKAN_MAX_UPLOAD_SIZE_MB=10
# Datapusher
DATAPUSHER_VERSION=0.0.17
DATAPUSHER_MAX_CONTENT_LENGTH=10485760
DATAPUSHER_CHUNK_SIZE=16384
DATAPUSHER_CHUNK_INSERT_ROWS=250
DATAPUSHER_DOWNLOAD_TIMEOUT=30
# Redis
REDIS_VERSION=6.0.7

View File

@ -0,0 +1,55 @@
###################
### Extensions ####
###################
FROM keitaro/ckan:2.9.0 as extbuild
MAINTAINER Keitaro Inc <info@keitaro.com>
# Locations and tags, please use specific tags or revisions
ENV HARVEST_GIT_URL=https://github.com/ckan/ckanext-harvest
ENV HARVEST_GIT_BRANCH=v1.3.1
# Switch to the root user
USER root
# Install necessary packages to build extensions
RUN apk add --no-cache \
gcc \
g++ \
libffi-dev \
openssl-dev \
python3-dev
# Fetch and build the custom CKAN extensions
RUN pip wheel --wheel-dir=/wheels git+${HARVEST_GIT_URL}@${HARVEST_GIT_BRANCH}#egg=ckanext-harvest
RUN pip wheel --wheel-dir=/wheels -r https://raw.githubusercontent.com/ckan/ckanext-harvest/${HARVEST_GIT_BRANCH}/pip-requirements.txt
RUN curl -o /wheels/harvest.txt https://raw.githubusercontent.com/ckan/ckanext-harvest/${HARVEST_GIT_BRANCH}/pip-requirements.txt
############
### MAIN ###
############
FROM keitaro/ckan:2.9.0
ENV CKAN__PLUGINS envvars image_view text_view recline_view datastore datapusher harvest ckan_harvester
# Switch to the root user
USER root
COPY --from=extbuild /wheels /srv/app/ext_wheels
# Install and enable the custom extensions
RUN pip install --no-index --find-links=/srv/app/ext_wheels ckanext-harvest && \
pip install --no-index --find-links=/srv/app/ext_wheels -r /srv/app/ext_wheels/harvest.txt && \
# Not working atm since ckan config tool tries to load config before executing config-tool, workaround
# ckan -c ${APP_DIR}/production.ini config-tool "ckan.plugins = ${CKAN__PLUGINS}" && \
sed -i "/ckan.plugins = envvars/c ckan.plugins = ${CKAN__PLUGINS}" ${APP_DIR}/production.ini && \
chown -R ckan:ckan /srv/app
# Remove wheels
RUN rm -rf /srv/app/ext_wheels
# Add harvest entrypoint script
COPY ./scripts/00_harvest.sh ${APP_DIR}/docker-entrypoint.d/00_harvest.sh
# Switch to the ckan user
USER ckan

View File

@ -0,0 +1,191 @@
# docker-compose build && docker-compose up -d
version: "3"
volumes:
ckan_data:
pg_data:
solr_data:
services:
ckan:
container_name: ckan
build:
context: .
networks:
- frontend
- backend
depends_on:
- db
ports:
- "0.0.0.0:${CKAN_PORT}:5000"
env_file:
- ./.ckan-env
environment:
- CKAN_SQLALCHEMY_URL=postgresql://ckan:${POSTGRES_PASSWORD}@db/ckan
- CKAN_DATASTORE_WRITE_URL=postgresql://ckan:${POSTGRES_PASSWORD}@db/datastore
- CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:${DATASTORE_READONLY_PASSWORD}@db/datastore
- CKAN_SOLR_URL=http://solr:8983/solr/ckan
- CKAN_REDIS_URL=redis://redis:6379/1
- CKAN_DATAPUSHER_URL=http://datapusher:8000
- CKAN_SITE_URL=${CKAN_SITE_URL}
- CKAN_MAX_UPLOAD_SIZE_MB=${CKAN_MAX_UPLOAD_SIZE_MB}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
- DS_RO_PASS=${DATASTORE_READONLY_PASSWORD}
volumes:
- ckan_data:/srv/app/data
ckan-harvest-gather:
container_name: ckan-harvest-gather
build:
context: .
command: ckan -c /srv/app/production.ini harvester gather_consumer
restart: on-failure
networks:
- frontend
- backend
depends_on:
- db
- ckan
env_file:
- ./.ckan-env
environment:
- CKAN_SQLALCHEMY_URL=postgresql://ckan:${POSTGRES_PASSWORD}@db/ckan
- CKAN_DATASTORE_WRITE_URL=postgresql://ckan:${POSTGRES_PASSWORD}@db/datastore
- CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:${DATASTORE_READONLY_PASSWORD}@db/datastore
- CKAN_SOLR_URL=http://solr:8983/solr/ckan
- CKAN_REDIS_URL=redis://redis:6379/1
- CKAN_DATAPUSHER_URL=http://datapusher:8000
- CKAN_SITE_URL=${CKAN_SITE_URL}
- CKAN_MAX_UPLOAD_SIZE_MB=${CKAN_MAX_UPLOAD_SIZE_MB}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
- DS_RO_PASS=${DATASTORE_READONLY_PASSWORD}
ckan-harvest-fetch:
container_name: ckan-harvest-fetch
build:
context: .
command: ckan -c /srv/app/production.ini harvester fetch_consumer
restart: on-failure
networks:
- frontend
- backend
depends_on:
- db
- ckan
env_file:
- ./.ckan-env
environment:
- CKAN_SQLALCHEMY_URL=postgresql://ckan:${POSTGRES_PASSWORD}@db/ckan
- CKAN_DATASTORE_WRITE_URL=postgresql://ckan:${POSTGRES_PASSWORD}@db/datastore
- CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:${DATASTORE_READONLY_PASSWORD}@db/datastore
- CKAN_SOLR_URL=http://solr:8983/solr/ckan
- CKAN_REDIS_URL=redis://redis:6379/1
- CKAN_DATAPUSHER_URL=http://datapusher:8000
- CKAN_SITE_URL=${CKAN_SITE_URL}
- CKAN_MAX_UPLOAD_SIZE_MB=${CKAN_MAX_UPLOAD_SIZE_MB}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
- DS_RO_PASS=${DATASTORE_READONLY_PASSWORD}
ckan-harvest-run:
container_name: ckan-harvest-run
build:
context: .
command: /bin/sh -c "while true; do sleep 90; ckan -c /srv/app/production.ini harvester run; done"
networks:
- frontend
- backend
depends_on:
- db
- ckan
env_file:
- ./.ckan-env
environment:
- CKAN_SQLALCHEMY_URL=postgresql://ckan:${POSTGRES_PASSWORD}@db/ckan
- CKAN_DATASTORE_WRITE_URL=postgresql://ckan:${POSTGRES_PASSWORD}@db/datastore
- CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:${DATASTORE_READONLY_PASSWORD}@db/datastore
- CKAN_SOLR_URL=http://solr:8983/solr/ckan
- CKAN_REDIS_URL=redis://redis:6379/1
- CKAN_DATAPUSHER_URL=http://datapusher:8000
- CKAN_SITE_URL=${CKAN_SITE_URL}
- CKAN_MAX_UPLOAD_SIZE_MB=${CKAN_MAX_UPLOAD_SIZE_MB}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
- DS_RO_PASS=${DATASTORE_READONLY_PASSWORD}
ckan-harvest-cleanlog:
container_name: ckan-harvest-cleanlog
build:
context: .
command: /bin/sh -c "while true; do sleep 150; ckan -c /srv/app/production.ini harvester clean_harvest_log; done"
networks:
- frontend
- backend
depends_on:
- db
- ckan
env_file:
- ./.ckan-env
environment:
- CKAN_SQLALCHEMY_URL=postgresql://ckan:${POSTGRES_PASSWORD}@db/ckan
- CKAN_DATASTORE_WRITE_URL=postgresql://ckan:${POSTGRES_PASSWORD}@db/datastore
- CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:${DATASTORE_READONLY_PASSWORD}@db/datastore
- CKAN_SOLR_URL=http://solr:8983/solr/ckan
- CKAN_REDIS_URL=redis://redis:6379/1
- CKAN_DATAPUSHER_URL=http://datapusher:8000
- CKAN_SITE_URL=${CKAN_SITE_URL}
- CKAN_MAX_UPLOAD_SIZE_MB=${CKAN_MAX_UPLOAD_SIZE_MB}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
- DS_RO_PASS=${DATASTORE_READONLY_PASSWORD}
datapusher:
container_name: datapusher
image: keitaro/ckan-datapusher:${DATAPUSHER_VERSION}
networks:
- frontend
- backend
ports:
- "8000:8000"
environment:
- DATAPUSHER_MAX_CONTENT_LENGTH=${DATAPUSHER_MAX_CONTENT_LENGTH}
- DATAPUSHER_CHUNK_SIZE=${DATAPUSHER_CHUNK_SIZE}
- DATAPUSHER_CHUNK_INSERT_ROWS=${DATAPUSHER_CHUNK_INSERT_ROWS}
- DATAPUSHER_DOWNLOAD_TIMEOUT=${DATAPUSHER_DOWNLOAD_TIMEOUT}
db:
container_name: db
build:
context: ../../compose
dockerfile: postgresql/Dockerfile
args:
- DS_RO_PASS=${DATASTORE_READONLY_PASSWORD}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
networks:
- backend
environment:
- DS_RO_PASS=${DATASTORE_READONLY_PASSWORD}
- POSTGRES_PASSWORD=${POSTGRES_PASSWORD}
volumes:
- pg_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD", "pg_isready", "-U", "ckan"]
solr:
container_name: solr
build:
context: ../../compose
dockerfile: solr/Dockerfile
args:
- CKAN_VERSION=${CKAN_VERSION}
networks:
- backend
volumes:
- solr_data:/opt/solr/server/solr/ckan/data
redis:
container_name: redis
image: redis:${REDIS_VERSION}
networks:
- backend
networks:
frontend:
backend:

View File

@ -0,0 +1,2 @@
#!/bin/sh
ckan -c /srv/app/production.ini harvester initdb