Merge branch 'master' into ckan2.10

This commit is contained in:
Kiril-Poposki1998 2023-12-19 11:10:46 +01:00
commit be071e36a7
32 changed files with 936 additions and 136 deletions

View File

@ -311,3 +311,49 @@ jobs:
ghcr.io/keitaroinc/datapusher:${{ steps.datapusher.outputs.IMAGE_TAG }}
cache-from: type=local,src=/tmp/.buildx-cache-datapusher
cache-to: type=local,mode=max,dest=/tmp/.buildx-cache-datapusher
build-psql-init:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
- name: Login to GitHub Container Registry
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.CR_PAT }}
- name: Cache Docker layers
uses: actions/cache@v2
with:
path: /tmp/.buildx-cache-psql-init
key: ${{ runner.os }}-buildx-psql-init-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-psql-init
- name: Get docker tag for psql-init image
id: psql-init
run: |
echo "::set-output name=IMAGE_TAG::$(awk -F '=' '/IMAGE_TAG/{print $2}' ./images/psql-init/Dockerfile)"
- name: Build and push psql-init
uses: docker/build-push-action@v2
with:
context: ./images/psql-init
file: ./images/psql-init/Dockerfile
push: true
tags: |
keitaro/psql-init:${{ steps.psql-init.outputs.IMAGE_TAG }}
ghcr.io/keitaroinc/psql-init:${{ steps.psql-init.outputs.IMAGE_TAG }}
cache-from: type=local,src=/tmp/.buildx-cache-psql-init
cache-to: type=local,mode=max,dest=/tmp/.buildx-cache-psql-init

View File

@ -233,3 +233,34 @@ jobs:
tags: keitaro/ckandatapusher:${{ steps.datapusher.outputs.IMAGE_TAG }}
cache-from: type=local,src=/tmp/.buildx-cache-datapusher
cache-to: type=local,mode=max,dest=/tmp/.buildx-cache-datapusher
build-psql-init:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Cache Docker layers
uses: actions/cache@v2
with:
path: /tmp/.buildx-cache-psql-init
key: ${{ runner.os }}-buildx-psql-init-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-psql-init
- name: Get docker tag for psql-init image
id: psql-init
run: |
echo "::set-output name=IMAGE_TAG::$(awk -F '=' '/IMAGE_TAG/{print $2}' ./images/psql-init/Dockerfile)"
- name: Build psql-init
uses: docker/build-push-action@v2
with:
context: ./images/psql-init
file: ./images/psql-init/Dockerfile
push: false
tags: keitaro/psql-init:${{ steps.psql-init.outputs.IMAGE_TAG }}
cache-from: type=local,src=/tmp/.buildx-cache-psql-init
cache-to: type=local,mode=max,dest=/tmp/.buildx-cache-psql-init

View File

@ -48,7 +48,7 @@ We recommend to use a multi-stage approach to extend the docker images that we p
###################
### Extensions ####
###################
FROM ghcr.io/keitaroinc/ckan:2.9.7 as extbuild
FROM ghcr.io/keitaroinc/ckan:2.9.9 as extbuild
# Switch to the root user
USER root
@ -64,7 +64,7 @@ RUN pip wheel --wheel-dir=/wheels git+https://github.com/acmecorp/ckanext-acme@0
############
### MAIN ###
############
FROM ghcr.io/keitaroinc/ckan:2.9.7
FROM ghcr.io/keitaroinc/ckan:2.9.9
# Add the custom extensions to the plugins list
ENV CKAN__PLUGINS envvars image_view text_view recline_view datastore datapusher acme
@ -94,9 +94,9 @@ You can add scripts to CKAN custom images and copy them to the *docker-afterinit
## Build
To build a CKAN image run:
```sh
docker build --tag ghcr.io/keitaroinc/ckan:2.9.7 images/ckan/2.9
docker build --tag ghcr.io/keitaroinc/ckan:2.9.9 images/ckan/2.9
```
The -tag ghcr.io/keitaroinc/ckan:2.9.7 flag sets the image name to ghcr.io/keitaroinc/ckan:2.9.7 and 'images/ckan/2.9' at the end tells docker build to use the context into the specified directory where the Dockerfile and related contents are.
The -tag ghcr.io/keitaroinc/ckan:2.9.9 flag sets the image name to ghcr.io/keitaroinc/ckan:2.9.9 and 'images/ckan/2.9' at the end tells docker build to use the context into the specified directory where the Dockerfile and related contents are.
## Upload to DockerHub
>*It's recommended to upload built images to DockerHub*

View File

@ -0,0 +1,203 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
NB Please copy changes to this file into the multilingual schema:
ckanext/multilingual/solr/schema.xml
-->
<!-- We update the version when there is a backward-incompatible change to this
schema. We used to use the `version` attribute for this but this is an internal
attribute that should not be used so starting from CKAN 2.10 we use the `name`
attribute with the form `ckan-X.Y` -->
<schema name="ckan-2.10" version="1.6">
<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
<fieldtype name="binary" class="solr.BinaryField"/>
<fieldType name="int" class="solr.IntPointField" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="float" class="solr.FloatPointField" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.LongPointField" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="double" class="solr.DoublePointField" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="pint" class="solr.IntPointField" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="pfloat" class="solr.FloatPointField" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="plong" class="solr.LongPointField" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="pdouble" class="solr.DoublePointField" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="date" class="solr.DatePointField" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="pdate" class="solr.DatePointField" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="pdates" class="solr.DatePointField" positionIncrementGap="0" multiValued="true"/>
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>
<fieldType name="pints" class="solr.IntPointField" positionIncrementGap="0" multiValued="true"/>
<fieldType name="pfloats" class="solr.FloatPointField" positionIncrementGap="0" multiValued="true"/>
<fieldType name="plongs" class="solr.LongPointField" positionIncrementGap="0" multiValued="true"/>
<fieldType name="pdoubles" class="solr.DoublePointField" positionIncrementGap="0" multiValued="true"/>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.FlattenGraphFilterFactory"/> <!-- required on index analyzers after graph filters -->
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
<filter class="solr.ASCIIFoldingFilterFactory"/>
</analyzer>
</fieldType>
<!-- A general unstemmed text field - good if one does not know the language of the field -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.FlattenGraphFilterFactory"/> <!-- required on index analyzers after graph filters -->
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text_ngram" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.NGramTokenizerFactory" minGramSize="2" maxGramSize="10"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
</types>
<fields>
<field name="index_id" type="string" indexed="true" stored="true" required="true" />
<field name="id" type="string" indexed="true" stored="true" required="true" />
<field name="site_id" type="string" indexed="true" stored="true" required="true" />
<field name="title" type="text" indexed="true" stored="true" />
<field name="title_ngram" type="text_ngram" indexed="true" stored="true" />
<field name="entity_type" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="dataset_type" type="string" indexed="true" stored="true" />
<field name="state" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="name" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="name_ngram" type="text_ngram" indexed="true" stored="true" />
<field name="revision_id" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="version" type="string" indexed="true" stored="true" />
<field name="url" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="ckan_url" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="download_url" type="string" indexed="true" stored="true" omitNorms="true" />
<field name="notes" type="text" indexed="true" stored="true"/>
<field name="author" type="text_general" indexed="true" stored="true" />
<field name="author_email" type="text_general" indexed="true" stored="true" />
<field name="maintainer" type="text_general" indexed="true" stored="true" />
<field name="maintainer_email" type="text_general" indexed="true" stored="true" />
<field name="license" type="string" indexed="true" stored="true" />
<field name="license_id" type="string" indexed="true" stored="true" />
<field name="tags" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="groups" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="organization" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="capacity" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="permission_labels" type="string" indexed="true" stored="false" multiValued="true"/>
<field name="res_name" type="text_general" indexed="true" stored="true" multiValued="true" />
<field name="res_description" type="text_general" indexed="true" stored="true" multiValued="true"/>
<field name="res_format" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="res_url" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="res_type" type="string" indexed="true" stored="true" multiValued="true"/>
<!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema -->
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="urls" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="depends_on" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="dependency_of" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="derives_from" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="has_derivation" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="links_to" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="linked_from" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="child_of" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="parent_of" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="views_total" type="int" indexed="true" stored="false"/>
<field name="views_recent" type="int" indexed="true" stored="false"/>
<field name="resources_accessed_total" type="int" indexed="true" stored="false"/>
<field name="resources_accessed_recent" type="int" indexed="true" stored="false"/>
<field name="metadata_created" type="date" indexed="true" stored="true" multiValued="false"/>
<field name="metadata_modified" type="date" indexed="true" stored="true" multiValued="false"/>
<field name="indexed_ts" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
<!-- Copy the title field into titleString, and treat as a string
(rather than text type). This allows us to sort on the titleString -->
<field name="title_string" type="string" indexed="true" stored="false" />
<field name="data_dict" type="string" indexed="false" stored="true" />
<field name="validated_data_dict" type="string" indexed="false" stored="true" />
<field name="_version_" type="string" indexed="true" stored="true"/>
<dynamicField name="*_date" type="date" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="extras_*" type="text" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="res_extras_*" type="text" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="vocab_*" type="string" indexed="true" stored="true" multiValued="true"/>
<dynamicField name="*" type="string" indexed="true" stored="false"/>
</fields>
<uniqueKey>index_id</uniqueKey>
<copyField source="url" dest="urls"/>
<copyField source="title" dest="title_ngram"/>
<copyField source="name" dest="name_ngram"/>
<copyField source="ckan_url" dest="urls"/>
<copyField source="download_url" dest="urls"/>
<copyField source="res_url" dest="urls"/>
<copyField source="extras_*" dest="text"/>
<copyField source="res_extras_*" dest="text"/>
<copyField source="vocab_*" dest="text"/>
<copyField source="urls" dest="text"/>
<copyField source="name" dest="text"/>
<copyField source="title" dest="text"/>
<copyField source="text" dest="text"/>
<copyField source="license" dest="text"/>
<copyField source="notes" dest="text"/>
<copyField source="tags" dest="text"/>
<copyField source="groups" dest="text"/>
<copyField source="organization" dest="text"/>
<copyField source="res_name" dest="text"/>
<copyField source="res_description" dest="text"/>
<copyField source="maintainer" dest="text"/>
<copyField source="author" dest="text"/>
</schema>

View File

@ -36,5 +36,5 @@ CKAN__DATAPUSHER__URL=http://datapusher:8000
CKAN__DATAPUSHER__CALLBACK_URL_BASE=http://ckan:5000/
# Solr configuration
CKAN_VERSION=2.9.7
CKAN_VERSION=2.9.9
CKAN_CORE_NAME=ckan

View File

@ -12,7 +12,7 @@ POSTGRES_PORT=5432
DATASTORE_READONLY_PASSWORD=datastore
# CKAN
CKAN_VERSION=2.9.7
CKAN_VERSION=2.9.9
CKAN_SITE_ID=default
CKAN_SITE_URL=http://localhost:5000
CKAN_PORT=5000

View File

@ -4,7 +4,7 @@
# Arguments are supplied via environment variables: CKAN_CORE_NAME CKAN_VERSION
# Example:
# CKAN_CORE_NAME=ckan
# CKAN_VERSION=2.9.7
# CKAN_VERSION=2.9.9
set -e

View File

@ -33,5 +33,5 @@ CKAN__HARVEST__MQ__TYPE=redis
CKAN__HARVEST__MQ__HOSTNAME=redis
# Solr configuration
CKAN_VERSION=2.9.7
CKAN_VERSION=2.9.9
CKAN_CORE_NAME=ckan

View File

@ -12,7 +12,7 @@ POSTGRES_PORT=5432
DATASTORE_READONLY_PASSWORD=datastore
# CKAN
CKAN_VERSION=2.9.7
CKAN_VERSION=2.9.9
CKAN_SITE_ID=default
CKAN_SITE_URL=http://localhost:5000
CKAN_PORT=5000

View File

@ -1,7 +1,7 @@
###################
### Extensions ####
###################
FROM ghcr.io/keitaroinc/ckan:2.9.7 as extbuild
FROM ghcr.io/keitaroinc/ckan:2.9.9 as extbuild
# Locations and tags, please use specific tags or revisions
ENV HARVEST_GIT_URL=https://github.com/ckan/ckanext-harvest
@ -30,7 +30,7 @@ USER ckan
############
### MAIN ###
############
FROM ghcr.io/keitaroinc/ckan:2.9.7
FROM ghcr.io/keitaroinc/ckan:2.9.9
LABEL maintainer="Keitaro Inc <info@keitaro.com>"

View File

@ -38,5 +38,5 @@ CKANEXT__S3FILESTORE__REGION_NAME=us-east-1
CKANEXT__S3FILESTORE__SIGNATURE_VERSION=s3v4
# Solr configuration
CKAN_VERSION=2.9.7
CKAN_VERSION=2.9.9
CKAN_CORE_NAME=ckan

View File

@ -12,7 +12,7 @@ POSTGRES_PORT=5432
DATASTORE_READONLY_PASSWORD=datastore
# CKAN
CKAN_VERSION=2.9.7
CKAN_VERSION=2.9.9
CKAN_SITE_ID=default
CKAN_SITE_URL=http://localhost:5000
CKAN_PORT=5000

View File

@ -1,7 +1,7 @@
###################
### Extensions ####
###################
FROM ghcr.io/keitaroinc/ckan:2.9.7 as extbuild
FROM ghcr.io/keitaroinc/ckan:2.9.9 as extbuild
# Locations and tags, please use specific tags or revisions
ENV S3FILESTORE_GIT_URL=https://github.com/keitaroinc/ckanext-s3filestore
@ -20,7 +20,7 @@ USER ckan
############
### MAIN ###
############
FROM ghcr.io/keitaroinc/ckan:2.9.7
FROM ghcr.io/keitaroinc/ckan:2.9.9
LABEL maintainer="Keitaro Inc <info@keitaro.com>"

View File

@ -170,6 +170,9 @@ RUN pip install -e /srv/app/src/ckan && \
# Remove wheels
RUN rm -rf /srv/app/wheels /srv/app/ext_wheels
# Install python2 secrets for generating sessions
RUN pip install python2-secrets
# Copy necessary scripts
COPY setup/app ${APP_DIR}

View File

@ -205,6 +205,7 @@ RUN pip install -e /srv/app/src/ckan && \
# Create and update CKAN config
# Generate CKAN config
paster --plugin=ckan make-config ckan ${APP_DIR}/production.ini && \
paster --plugin=ckan config-tool ${APP_DIR}/production.ini "beaker.session.secret = " && \
paster --plugin=ckan config-tool ${APP_DIR}/production.ini "ckan.plugins = ${CKAN__PLUGINS}" && \
# Set the default level for extensions to INFO
paster --plugin=ckan config-tool ${APP_DIR}/production.ini -s logger_ckanext -e level=INFO && \

View File

@ -12,6 +12,14 @@ then
done
fi
# Add session secret from chart
if [[ -z $BEAKER_SESSION_SECRET || -v $BEAKER_SESSION_SECRET ]];then
echo "Not all environment variables are set. Generating sessions..."
else
echo "Setting session secrets from environment variables"
paster --plugin=ckan config-tool $APP_DIR/production.ini "beaker.session.secret=$BEAKER_SESSION_SECRET"
fi
if grep -E "beaker.session.secret ?= ?$" $APP_DIR/production.ini
then
echo "Setting beaker.session.secret in ini file"

View File

@ -163,6 +163,9 @@ RUN pip install -e /srv/app/src/ckan && \
# Remove wheels
RUN rm -rf /srv/app/wheels /srv/app/ext_wheels
# Install python2 secrets for generating sessions
RUN pip install python2-secrets
# Copy necessary scripts
COPY setup/app ${APP_DIR}

View File

@ -191,6 +191,7 @@ RUN pip install -e /srv/app/src/ckan && \
# Create and update CKAN config
# Generate CKAN config
paster --plugin=ckan make-config ckan ${APP_DIR}/production.ini && \
paster --plugin=ckan config-tool ${APP_DIR}/production.ini "beaker.session.secret = " && \
paster --plugin=ckan config-tool ${APP_DIR}/production.ini "ckan.plugins = ${CKAN__PLUGINS}" && \
# Set the default level for extensions to INFO
paster --plugin=ckan config-tool ${APP_DIR}/production.ini -s logger_ckanext -e level=INFO && \

View File

@ -12,6 +12,14 @@ then
done
fi
# Add session secret from chart
if [[ -z $BEAKER_SESSION_SECRET || -v $BEAKER_SESSION_SECRET ]];then
echo "Not all environment variables are set. Generating sessions..."
else
echo "Setting session secrets from environment variables"
paster --plugin=ckan config-tool $APP_DIR/production.ini "beaker.session.secret=$BEAKER_SESSION_SECRET"
fi
if grep -E "beaker.session.secret ?= ?$" $APP_DIR/production.ini
then
echo "Setting beaker.session.secret in ini file"

View File

@ -4,11 +4,11 @@
FROM alpine:3.13.7 as ckanbuild
# Used by Github Actions to tag the image with
ENV IMAGE_TAG=2.9.7
ENV IMAGE_TAG=2.9.9
# Set CKAN version to build
ENV GIT_URL=https://github.com/ckan/ckan.git
ENV GIT_BRANCH=ckan-2.9.7
ENV GIT_BRANCH=ckan-2.9.9
# Set src dirs
ENV SRC_DIR=/srv/app/src
@ -57,12 +57,17 @@ RUN pip install -e git+${GIT_URL}@${GIT_BRANCH}#egg=ckan
COPY ./patches ${SRC_DIR}/patches
COPY ./scripts/apply_ckan_patches.sh ${SRC_DIR}/apply_ckan_patches.sh
# Apply patches
RUN ${SRC_DIR}/apply_ckan_patches.sh
RUN cd ${SRC_DIR} && ls -lah ${SRC_DIR} && ash ${SRC_DIR}/apply_ckan_patches.sh
RUN rm -rf /srv/app/src/ckan/.git
# Create a constraint file that limits the Cython version to a compatible one, see https://github.com/yaml/pyyaml/issues/736
RUN echo 'Cython < 3.0' > /tmp/constraint.txt
RUN PIP_CONSTRAINT=/tmp/constraint.txt pip wheel --wheel-dir=/wheels PyYAML==5.4.1
# RUN pip-compile ckan/requirements.in
RUN pip wheel --wheel-dir=/wheels -r ckan/requirements.txt
RUN pip wheel --wheel-dir=/wheels uWSGI==2.0.20 gevent==21.12.0 greenlet==1.1.3
###########################
### Default-Extensions ####
###########################
@ -174,6 +179,7 @@ RUN pip install -e /srv/app/src/ckan && \
# Change ownership to app user
chown -R ckan:ckan /srv/app
RUN pip install sqlalchemy==1.3.19
# Remove wheels
RUN rm -rf /srv/app/wheels /srv/app/ext_wheels

View File

@ -4,11 +4,11 @@
FROM ubuntu:focal-20210827 as ckanbuild
# Used by Github Actions to tag the image with
ENV IMAGE_TAG=2.9.7-focal
ENV IMAGE_TAG=2.9.9-focal
# Set CKAN version to build
ENV GIT_URL=https://github.com/ckan/ckan.git
ENV GIT_BRANCH=ckan-2.9.7
ENV GIT_BRANCH=ckan-2.9.9
# Set timezone
ENV TZ=UTC
@ -210,6 +210,7 @@ RUN pip install -e /srv/app/src/ckan && \
# Generate CKAN config
ckan generate config ${APP_DIR}/production.ini && \
# Configure plugins
ckan config-tool ${APP_DIR}/production.ini "beaker.session.secret = " && \
ckan config-tool ${APP_DIR}/production.ini "ckan.plugins = ${CKAN__PLUGINS}" && \
# Create the data directory
mkdir ${DATA_DIR} && \

View File

@ -1,11 +1,11 @@
--- ckan/ckan/model/__init__.py 2021-02-16 14:47:06.168327441 +0100
+++ ckan/ckan/model/__init__.py 2021-02-16 14:48:00.740780218 +0100
@@ -266,7 +266,7 @@
--- ckan/ckan/model/__init__.py
+++ ckan/ckan/model/__init__.py
@@ -276,7 +276,7 @@ class Repository():
self.reset_alembic_output()
alembic_config = AlembicConfig(self._alembic_ini)
alembic_config.set_main_option(
- "sqlalchemy.url", str(self.metadata.bind.url)
+ "sqlalchemy.url", str(self.metadata.bind.url).replace('%', '%%')
- "sqlalchemy.url", config.get("sqlalchemy.url")
+ "sqlalchemy.url", config.get("sqlalchemy.url").replace('%', '%%')
)
try:
sqlalchemy_migrate_version = self.metadata.bind.execute(

View File

@ -1,6 +1,6 @@
--- ckan/ckanext/datastore/backend/postgres.py 2021-02-18 11:01:56.692267462 +0100
+++ ckan/ckanext/datastore/backend/postgres-patch.py 2021-02-18 13:45:16.033193435 +0100
@@ -1690,7 +1690,7 @@
--- ckan/ckanext/datastore/backend/postgres.py
+++ ckan/ckanext/datastore/backend/postgres.py
@@ -1809,7 +1809,7 @@ class DatastorePostgresqlBackend(DatastoreBackend):
read only user.
'''
write_connection = self._get_write_engine().connect()

View File

@ -1,11 +0,0 @@
--- ckan/ckan/logic/action/update.py 2021-02-17 16:46:55.673578728 +0100
+++ ckan/ckan/logic/action/update-edit.py 2021-02-17 16:47:28.905879170 +0100
@@ -929,7 +929,7 @@
'''
model = context['model']
- session = model.Session
+ session = model.meta.create_local_session()
context['session'] = session
user = context['user']

View File

@ -0,0 +1,39 @@
[uwsgi]
route = ^(?!/api).*$ basicauth:Restricted,/srv/app/.htpasswd
socket = /tmp/uwsgi.sock
uid = ckan
gid = ckan
http = :5000
master = true
enable-threads = true
lazy-apps = true
gevent-early-monkey-patch = true
vacuum = true
single-interpreter= true
die-on-term = true
need-app = true
auto-procname = true
wsgi-file = /srv/app/wsgi.py
module = wsgi:application
gevent = 2000
callable = application
paste = config:/srv/app/production.ini
paste-logger = /srv/app/production.ini
post-buffering = 1
buffer-size= 12288
max-requests = 3000
max-worker-lifetime = 3600
reload-on-rss = 4096
worker-reload-mercy = 60
socket-timeout = 300
queue = 1000
queue-blocksize = 204800
static-gzip-all = true
listen = 1000
http-timeout = 1000
http-headers-timeout = 1000
http-connect-timeout = 1000

View File

@ -1,12 +1,9 @@
"""
Copyright (c) 2016 Keitaro AB
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ -14,12 +11,13 @@ See the License for the specific language governing permissions and
limitations under the License.
"""
from multiprocessing import connection
import os
import sys
import subprocess
import psycopg2
from sqlalchemy.engine.url import make_url
import urllib.request, urllib.error, urllib.parse
import urllib.request, urllib.error, urllib.parse, base64
import re
import json
@ -70,10 +68,20 @@ def check_solr_connection(retry=None):
sys.exit(1)
url = os.environ.get('CKAN_SOLR_URL', '')
username = os.environ.get('CKAN_SOLR_USER', '')
password = os.environ.get('CKAN_SOLR_PASSWORD', '')
search_url = '{url}/schema/name?wt=json'.format(url=url)
try:
if not username:
connection = urllib.request.urlopen(search_url)
else:
request = urllib.request.Request(search_url)
base64string = base64.b64encode(bytes('%s:%s' % (username, password),'ascii'))
request.add_header("Authorization", "Basic %s" % base64string.decode('utf-8'))
connection = urllib.request.urlopen(request)
except urllib.error.URLError as e:
print('[prerun] Unable to connect to solr...try again in a while.')
import time

View File

@ -12,17 +12,26 @@ then
done
fi
# Add session secret from chart
if [[ -z $BEAKER_SESSION_SECRET || -v $BEAKER_SESSION_SECRET || -z $JWT_ENCODE_SECRET || -v $JWT_ENCODE_SECRET || -z $JWT_DECODE_SECRET || -v $JWT_DECODE_SECRET ]];then
echo "Not all environment variables are set. Generating sessions..."
else
echo "Setting session secrets from environment variables"
ckan config-tool $APP_DIR/production.ini "beaker.session.secret=$BEAKER_SESSION_SECRET"
ckan config-tool $APP_DIR/production.ini "api_token.jwt.encode.secret=$JWT_ENCODE_SECRET"
ckan config-tool $APP_DIR/production.ini "api_token.jwt.decode.secret=$JWT_DECODE_SECRET"
fi
if grep -E "beaker.session.secret ?= ?$" $APP_DIR/production.ini
then
echo "Setting secrets in ini file"
ckan config-tool $APP_DIR/production.ini "beaker.session.secret=$(python3 -c 'import secrets; print(secrets.token_urlsafe())')"
ckan config-tool $APP_DIR/production.ini "api_token.jwt.encode.secret=$(python3 -c 'import secrets; print("string:" + secrets.token_urlsafe())')"
ckan config-tool $APP_DIR/production.ini "api_token.jwt.decode.secret=$(python3 -c 'import secrets; print("string:" + secrets.token_urlsafe())')"
ckan config-tool $APP_DIR/production.ini "WTF_CSRF_SECRET_KEY=$(python3 -c 'import secrets; print(secrets.token_urlsafe())')"
JWT_SECRET=$(python3 -c 'import secrets; print("string:" + secrets.token_urlsafe())')
ckan config-tool $APP_DIR/production.ini "api_token.jwt.encode.secret=$JWT_SECRET"
ckan config-tool $APP_DIR/production.ini "api_token.jwt.decode.secret=$JWT_SECRET"
fi
echo "Starting UWSGI with '${UWSGI_PROC_NO:-2}' workers"
UWSGI_OPTS="--socket /tmp/uwsgi.sock --uid ckan --gid ckan --http :5000 --master --enable-threads --wsgi-file /srv/app/wsgi.py --module wsgi:application --lazy-apps --gevent 2000 -p ${UWSGI_PROC_NO:-2} -L --gevent-early-monkey-patch --vacuum --harakiri 50 --callable application"
# Run the prerun script to init CKAN and create the default admin user
python prerun.py || { echo '[CKAN prerun] FAILED. Exiting...' ; exit 1; }
@ -52,14 +61,15 @@ then
# Generate htpasswd file for basicauth
htpasswd -d -b -c /srv/app/.htpasswd $HTPASSWD_USER $HTPASSWD_PASSWORD
# Start uwsgi with basicauth
uwsgi --ini /srv/app/uwsgi.conf --pcre-jit $UWSGI_OPTS
uwsgi --ini /srv/app/basic-auth-uwsgi.conf -p ${UWSGI_PROC_NO:-2} --pcre-jit
else
echo "Missing HTPASSWD_USER or HTPASSWD_PASSWORD environment variables. Exiting..."
exit 1
fi
else
# Start uwsgi
uwsgi $UWSGI_OPTS
echo "Starting UWSGI with '${UWSGI_PROC_NO:-2}' workers"
uwsgi --ini /srv/app/uwsgi.conf -p ${UWSGI_PROC_NO:-2}
fi
else
echo "[prerun] failed...not starting CKAN."

View File

@ -1,2 +1,37 @@
[uwsgi]
route = ^(?!/api).*$ basicauth:Restricted,/srv/app/.htpasswd
socket = /tmp/uwsgi.sock
uid = ckan
gid = ckan
http = :5000
master = true
enable-threads = true
lazy-apps = true
gevent-early-monkey-patch = true
vacuum = true
single-interpreter= true
die-on-term = true
need-app = true
auto-procname = true
wsgi-file = /srv/app/wsgi.py
module = wsgi:application
gevent = 2000
callable = application
paste = config:/srv/app/production.ini
paste-logger = /srv/app/production.ini
post-buffering = 1
buffer-size= 12288
max-requests = 3000
max-worker-lifetime = 3600
reload-on-rss = 4096
worker-reload-mercy = 60
socket-timeout = 300
queue = 1000
queue-blocksize = 204800
static-gzip-all = true
listen = 1000
http-timeout = 1000
http-headers-timeout = 1000
http-connect-timeout = 1000

View File

@ -0,0 +1,21 @@
# Start with a lightweight base image
FROM python:3.9-alpine
# Used by Github Actions to tag the image with
ENV IMAGE_TAG=0.0.1
# Set the working directory in the container
WORKDIR /srv
# Copy the requirements file to the container
COPY requirements.txt .
# Install the Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Copy the rest of the application code to the container
COPY psql-init/ .
CMD ["python", "/srv/psql-init.py"]

View File

@ -0,0 +1,277 @@
"""
Copyright (c) 2020 Keitaro AB
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import sys
import subprocess
import re
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from psycopg2.extensions import AsIs
from sqlalchemy.engine.url import make_url
ckan_conn_str = os.environ.get('CKAN_SQLALCHEMY_URL', '')
datastorerw_conn_str = os.environ.get('CKAN_DATASTORE_WRITE_URL', '')
datastorero_conn_str = os.environ.get('CKAN_DATASTORE_READ_URL', '')
master_user = os.environ.get('PSQL_MASTER', '')
master_passwd = os.environ.get('PSQL_PASSWD', '')
master_database = os.environ.get('PSQL_DB', '')
class DB_Params:
def __init__(self, conn_str):
self.db_user = make_url(conn_str).username
self.db_passwd = make_url(conn_str).password
self.db_host = make_url(conn_str).host
self.db_name = make_url(conn_str).database
def check_db_connection(db_params, retry=None):
print('Checking whether database is up...')
if retry is None:
retry = 20
elif retry == 0:
print('Giving up...')
sys.exit(1)
try:
con = psycopg2.connect(user=master_user,
host=db_params.db_host,
password=master_passwd,
database=master_database)
except psycopg2.Error as e:
print((str(e)))
print('Unable to connect to the database...try again in a while.')
import time
time.sleep(30)
check_db_connection(db_params, retry=retry - 1)
else:
con.close()
def create_user(db_params):
con = None
try:
con = psycopg2.connect(user=master_user,
host=db_params.db_host,
password=master_passwd,
database=master_database)
con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cur = con.cursor()
print("Creating user " + db_params.db_user.split("@")[0])
cur.execute('CREATE ROLE "%s" ' +
'WITH ' +
'LOGIN NOSUPERUSER INHERIT ' +
'CREATEDB NOCREATEROLE NOREPLICATION ' +
'PASSWORD %s',
(AsIs(db_params.db_user.split("@")[0]),
db_params.db_passwd,))
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
finally:
cur.close()
con.close()
def create_db(db_params):
con = None
try:
con = psycopg2.connect(user=master_user,
host=db_params.db_host,
password=master_passwd,
database=master_database)
con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cur = con.cursor()
cur.execute('GRANT "' + db_params.db_user.split("@")
[0] + '" TO "' + master_user.split("@")[0] + '"')
print("Creating database " + db_params.db_name + " with owner " +
db_params.db_user.split("@")[0])
cur.execute('CREATE DATABASE ' + db_params.db_name + ' OWNER "' +
db_params.db_user.split("@")[0] + '"')
cur.execute('GRANT ALL PRIVILEGES ON DATABASE ' +
db_params.db_name + ' TO "' +
db_params.db_user.split("@")[0] + '"')
if is_pg_buffercache_enabled(db_params) >= 1:
# FIXME: This is a known issue with pg_buffercache access
# For more info check this thread:
# https://www.postgresql.org/message-id/21009351582737086%40iva6-22e79380f52c.qloud-c.yandex.net
print("Granting privileges on pg_monitor to " +
db_params.db_user.split("@")[0])
cur.execute('GRANT "pg_monitor" TO "' + db_params.db_user.split("@")[0] + '"')
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
finally:
cur.close()
con.close()
def is_pg_buffercache_enabled(db_params):
con = None
result = None
try:
con = psycopg2.connect(user=master_user,
host=db_params.db_host,
password=master_passwd,
database=db_params.db_name)
con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cur = con.cursor()
cur.execute("SELECT count(*) FROM pg_extension " +
"WHERE extname = 'pg_buffercache'")
result = cur.fetchone()
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
finally:
cur.close()
con.close()
return result[0]
def set_datastore_permissions(datastore_rw_params, datastore_ro_params, sql):
con = None
try:
con = psycopg2.connect(user=master_user,
host=datastore_rw_params.db_host,
password=master_passwd,
database=datastore_rw_params.db_name)
con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cur = con.cursor()
cur.execute('GRANT CONNECT ON DATABASE ' +
datastore_rw_params.db_name +
' TO ' + datastore_ro_params.db_user.split("@")[0])
if is_pg_buffercache_enabled(datastore_rw_params) >= 1:
print("Granting privileges on pg_monitor to " +
datastore_ro_params.db_user.split("@")[0])
cur.execute('GRANT ALL PRIVILEGES ON TABLE pg_monitor TO ' +
datastore_ro_params.db_user.split("@")[0])
print("Setting datastore permissions\n")
print(sql)
cur.execute(sql)
print("Datastore permissions applied.")
except Exception as error:
print("ERROR DB: ", error)
finally:
cur.close()
con.close()
if master_user == '' or master_passwd == '' or master_database == '':
print("No master postgresql user provided.")
print("Cannot initialize default CKAN db resources. Exiting!")
sys.exit(1)
print("Master DB: " + master_database + " Master User: " + master_user)
ckan_db = DB_Params(ckan_conn_str)
datastorerw_db = DB_Params(datastorerw_conn_str)
datastorero_db = DB_Params(datastorero_conn_str)
# Check to see whether we can connect to the database, exit after 10 mins
check_db_connection(ckan_db)
try:
create_user(ckan_db)
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
try:
create_user(datastorerw_db)
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
try:
create_user(datastorero_db)
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
try:
create_db(ckan_db)
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
try:
create_db(datastorerw_db)
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
def execute_sql_script(ckan_dbp, datastorero_dbp, datastorerw_dbp, script_path):
# Connect to the database
conn = psycopg2.connect(
user=master_user,
host=datastorerw_dbp.db_host,
password=master_passwd,
database=datastorerw_dbp.db_name
)
try:
# Create a cursor
cur = conn.cursor()
# Execute the SQL script
with open(script_path, 'r') as f:
sql_script = f.read()
# Replace placeholders with actual values
sql_script = sql_script.replace('{datastoredb}', datastorerw_dbp.db_name)
sql_script = sql_script.replace('{readuser}', datastorero_dbp.db_user)
sql_script = sql_script.replace('{writeuser}', datastorerw_dbp.db_user)
sql_script = sql_script.replace('{mainuser}', ckan_dbp.db_user)
sql_script = sql_script.replace('{maindb}', ckan_dbp.db_name)
print("CKAN DB User:", ckan_dbp.db_user)
# Execute the SQL script
cur.execute(sql_script)
# Commit the changes
conn.commit()
print("SQL script executed successfully.")
print("CKAN DB User:", ckan_dbp.db_user)
print("read/write DB User:", datastorerw_dbp.db_user)
print("read/write DB name:", datastorerw_dbp.db_name)
print("read/write host:", datastorerw_dbp.db_host)
print("read DB user:", datastorero_dbp.db_user)
print("read DB name:", datastorero_dbp.db_name)
except psycopg2.Error as e:
print(f"Error executing SQL script: {str(e)}")
finally:
# Close the cursor and the connection
cur.close()
conn.close()
set_permissions = './set_permissions.sql'
# Print the current working directory
print("Current working directory:", os.getcwd())
# Check if the file exists
if os.path.isfile(set_permissions):
print("File exists.")
# Call the execute_sql_script function with the appropriate arguments
execute_sql_script(ckan_db, datastorero_db, datastorerw_db, set_permissions)
else:
print("File not found.")

View File

@ -0,0 +1,108 @@
/*
This script configures the permissions for the datastore.
It ensures that the datastore read-only user will only be able to select from
the datastore database but has no create/write/edit permission or any
permissions on other databases. You must execute this script as a database
superuser on the PostgreSQL server that hosts your datastore database.
For example, if PostgreSQL is running locally and the "postgres" user has the
appropriate permissions (as in the default Ubuntu PostgreSQL install), you can
run:
ckan -c /etc/ckan/default/ckan.ini datastore set-permissions | sudo -u postgres psql
Or, if your PostgreSQL server is remote, you can pipe the permissions script
over SSH:
ckan -c /etc/ckan/default/ckan.ini datastore set-permissions | ssh dbserver sudo -u postgres psql
*/
-- Most of the following commands apply to an explicit database or to the whole
-- 'public' schema, and could be executed anywhere. But ALTER DEFAULT
-- PERMISSIONS applies to the current database, and so we must be connected to
-- the datastore DB:
--\connect {datastoredb}
-- revoke permissions for the read-only user
REVOKE CREATE ON SCHEMA public FROM PUBLIC;
REVOKE USAGE ON SCHEMA public FROM PUBLIC;
GRANT CREATE ON SCHEMA public TO {mainuser};
GRANT USAGE ON SCHEMA public TO {mainuser};
GRANT CREATE ON SCHEMA public TO {writeuser};
GRANT USAGE ON SCHEMA public TO {writeuser};
-- take connect permissions from main db
REVOKE CONNECT ON DATABASE {maindb} FROM {readuser};
-- grant select permissions for read-only user
GRANT CONNECT ON DATABASE {datastoredb} TO {readuser};
GRANT USAGE ON SCHEMA public TO {readuser};
-- grant access to current tables and views to read-only user
GRANT SELECT ON ALL TABLES IN SCHEMA public TO {readuser};
-- grant access to new tables and views by default
ALTER DEFAULT PRIVILEGES FOR USER {writeuser} IN SCHEMA public
GRANT SELECT ON TABLES TO {readuser};
-- a view for listing valid table (resource id) and view names
CREATE OR REPLACE VIEW "_table_metadata" AS
SELECT DISTINCT
substr(md5(dependee.relname || COALESCE(dependent.relname, '')), 0, 17) AS "_id",
dependee.relname AS name,
dependee.oid AS oid,
dependent.relname AS alias_of
FROM
pg_class AS dependee
LEFT OUTER JOIN pg_rewrite AS r ON r.ev_class = dependee.oid
LEFT OUTER JOIN pg_depend AS d ON d.objid = r.oid
LEFT OUTER JOIN pg_class AS dependent ON d.refobjid = dependent.oid
WHERE
(dependee.oid != dependent.oid OR dependent.oid IS NULL) AND
-- is a table (from pg_tables view definition)
-- or is a view (from pg_views view definition)
(dependee.relkind = 'r'::"char" OR dependee.relkind = 'v'::"char")
AND dependee.relnamespace = (
SELECT oid FROM pg_namespace WHERE nspname='public')
ORDER BY dependee.oid DESC;
ALTER VIEW "_table_metadata" OWNER TO {writeuser};
GRANT SELECT ON "_table_metadata" TO {readuser};
-- _full_text fields are now updated by a trigger when set to NULL
CREATE OR REPLACE FUNCTION populate_full_text_trigger() RETURNS trigger
AS $body$
BEGIN
IF NEW._full_text IS NOT NULL THEN
RETURN NEW;
END IF;
NEW._full_text := (
SELECT to_tsvector(string_agg(value, ' '))
FROM json_each_text(row_to_json(NEW.*))
WHERE key NOT LIKE '\_%');
RETURN NEW;
END;
$body$ LANGUAGE plpgsql;
ALTER FUNCTION populate_full_text_trigger() OWNER TO {writeuser};
-- migrate existing tables that don't have full text trigger applied
DO $body$
BEGIN
EXECUTE coalesce(
(SELECT string_agg(
'CREATE TRIGGER zfulltext BEFORE INSERT OR UPDATE ON ' ||
quote_ident(relname) || ' FOR EACH ROW EXECUTE PROCEDURE ' ||
'populate_full_text_trigger();', ' ')
FROM pg_class
LEFT OUTER JOIN pg_trigger AS t
ON t.tgrelid = relname::regclass AND t.tgname = 'zfulltext'
WHERE relkind = 'r'::"char" AND t.tgname IS NULL
AND relnamespace = (
SELECT oid FROM pg_namespace WHERE nspname='public')),
'SELECT 1;');
END;
$body$;

View File

@ -0,0 +1,2 @@
psycopg2-binary==2.9.3
sqlalchemy==1.3.5