Merge pull request #104 from keitaroinc/psql-init-Dockerfile

psql-init dockerfile and scripts added
This commit is contained in:
Filip.mihajlovski 2023-07-03 11:50:14 +02:00 committed by GitHub
commit 798c974ab1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 485 additions and 0 deletions

View File

@ -246,3 +246,49 @@ jobs:
ghcr.io/keitaroinc/datapusher:${{ steps.datapusher.outputs.IMAGE_TAG }}
cache-from: type=local,src=/tmp/.buildx-cache-datapusher
cache-to: type=local,mode=max,dest=/tmp/.buildx-cache-datapusher
build-psql-init:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
- name: Login to GitHub Container Registry
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.CR_PAT }}
- name: Cache Docker layers
uses: actions/cache@v2
with:
path: /tmp/.buildx-cache-psql-init
key: ${{ runner.os }}-buildx-psql-init-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-psql-init
- name: Get docker tag for psql-init image
id: psql-init
run: |
echo "::set-output name=IMAGE_TAG::$(awk -F '=' '/IMAGE_TAG/{print $2}' ./images/psql-init/Dockerfile)"
- name: Build and push psql-init
uses: docker/build-push-action@v2
with:
context: ./images/psql-init
file: ./images/psql-init/Dockerfile
push: true
tags: |
keitaro/psql-init:${{ steps.psql-init.outputs.IMAGE_TAG }}
ghcr.io/keitaroinc/psql-init:${{ steps.psql-init.outputs.IMAGE_TAG }}
cache-from: type=local,src=/tmp/.buildx-cache-psql-init
cache-to: type=local,mode=max,dest=/tmp/.buildx-cache-psql-init

View File

@ -182,3 +182,34 @@ jobs:
tags: keitaro/ckandatapusher:${{ steps.datapusher.outputs.IMAGE_TAG }}
cache-from: type=local,src=/tmp/.buildx-cache-datapusher
cache-to: type=local,mode=max,dest=/tmp/.buildx-cache-datapusher
build-psql-init:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Cache Docker layers
uses: actions/cache@v2
with:
path: /tmp/.buildx-cache-psql-init
key: ${{ runner.os }}-buildx-psql-init-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-psql-init
- name: Get docker tag for psql-init image
id: psql-init
run: |
echo "::set-output name=IMAGE_TAG::$(awk -F '=' '/IMAGE_TAG/{print $2}' ./images/psql-init/Dockerfile)"
- name: Build psql-init
uses: docker/build-push-action@v2
with:
context: ./images/psql-init
file: ./images/psql-init/Dockerfile
push: false
tags: keitaro/psql-init:${{ steps.psql-init.outputs.IMAGE_TAG }}
cache-from: type=local,src=/tmp/.buildx-cache-psql-init
cache-to: type=local,mode=max,dest=/tmp/.buildx-cache-psql-init

View File

@ -0,0 +1,21 @@
# Start with a lightweight base image
FROM python:3.9-alpine
# Used by Github Actions to tag the image with
ENV IMAGE_TAG=0.0.1
# Set the working directory in the container
WORKDIR /srv
# Copy the requirements file to the container
COPY requirements.txt .
# Install the Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Copy the rest of the application code to the container
COPY psql-init/ .
CMD ["python", "/srv/psql-init.py"]

View File

@ -0,0 +1,277 @@
"""
Copyright (c) 2020 Keitaro AB
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import os
import sys
import subprocess
import re
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from psycopg2.extensions import AsIs
from sqlalchemy.engine.url import make_url
ckan_conn_str = os.environ.get('CKAN_SQLALCHEMY_URL', '')
datastorerw_conn_str = os.environ.get('CKAN_DATASTORE_WRITE_URL', '')
datastorero_conn_str = os.environ.get('CKAN_DATASTORE_READ_URL', '')
master_user = os.environ.get('PSQL_MASTER', '')
master_passwd = os.environ.get('PSQL_PASSWD', '')
master_database = os.environ.get('PSQL_DB', '')
class DB_Params:
def __init__(self, conn_str):
self.db_user = make_url(conn_str).username
self.db_passwd = make_url(conn_str).password
self.db_host = make_url(conn_str).host
self.db_name = make_url(conn_str).database
def check_db_connection(db_params, retry=None):
print('Checking whether database is up...')
if retry is None:
retry = 20
elif retry == 0:
print('Giving up...')
sys.exit(1)
try:
con = psycopg2.connect(user=master_user,
host=db_params.db_host,
password=master_passwd,
database=master_database)
except psycopg2.Error as e:
print((str(e)))
print('Unable to connect to the database...try again in a while.')
import time
time.sleep(30)
check_db_connection(db_params, retry=retry - 1)
else:
con.close()
def create_user(db_params):
con = None
try:
con = psycopg2.connect(user=master_user,
host=db_params.db_host,
password=master_passwd,
database=master_database)
con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cur = con.cursor()
print("Creating user " + db_params.db_user.split("@")[0])
cur.execute('CREATE ROLE "%s" ' +
'WITH ' +
'LOGIN NOSUPERUSER INHERIT ' +
'CREATEDB NOCREATEROLE NOREPLICATION ' +
'PASSWORD %s',
(AsIs(db_params.db_user.split("@")[0]),
db_params.db_passwd,))
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
finally:
cur.close()
con.close()
def create_db(db_params):
con = None
try:
con = psycopg2.connect(user=master_user,
host=db_params.db_host,
password=master_passwd,
database=master_database)
con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cur = con.cursor()
cur.execute('GRANT "' + db_params.db_user.split("@")
[0] + '" TO "' + master_user.split("@")[0] + '"')
print("Creating database " + db_params.db_name + " with owner " +
db_params.db_user.split("@")[0])
cur.execute('CREATE DATABASE ' + db_params.db_name + ' OWNER "' +
db_params.db_user.split("@")[0] + '"')
cur.execute('GRANT ALL PRIVILEGES ON DATABASE ' +
db_params.db_name + ' TO "' +
db_params.db_user.split("@")[0] + '"')
if is_pg_buffercache_enabled(db_params) >= 1:
# FIXME: This is a known issue with pg_buffercache access
# For more info check this thread:
# https://www.postgresql.org/message-id/21009351582737086%40iva6-22e79380f52c.qloud-c.yandex.net
print("Granting privileges on pg_monitor to " +
db_params.db_user.split("@")[0])
cur.execute('GRANT "pg_monitor" TO "' + db_params.db_user.split("@")[0] + '"')
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
finally:
cur.close()
con.close()
def is_pg_buffercache_enabled(db_params):
con = None
result = None
try:
con = psycopg2.connect(user=master_user,
host=db_params.db_host,
password=master_passwd,
database=db_params.db_name)
con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cur = con.cursor()
cur.execute("SELECT count(*) FROM pg_extension " +
"WHERE extname = 'pg_buffercache'")
result = cur.fetchone()
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
finally:
cur.close()
con.close()
return result[0]
def set_datastore_permissions(datastore_rw_params, datastore_ro_params, sql):
con = None
try:
con = psycopg2.connect(user=master_user,
host=datastore_rw_params.db_host,
password=master_passwd,
database=datastore_rw_params.db_name)
con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
cur = con.cursor()
cur.execute('GRANT CONNECT ON DATABASE ' +
datastore_rw_params.db_name +
' TO ' + datastore_ro_params.db_user.split("@")[0])
if is_pg_buffercache_enabled(datastore_rw_params) >= 1:
print("Granting privileges on pg_monitor to " +
datastore_ro_params.db_user.split("@")[0])
cur.execute('GRANT ALL PRIVILEGES ON TABLE pg_monitor TO ' +
datastore_ro_params.db_user.split("@")[0])
print("Setting datastore permissions\n")
print(sql)
cur.execute(sql)
print("Datastore permissions applied.")
except Exception as error:
print("ERROR DB: ", error)
finally:
cur.close()
con.close()
if master_user == '' or master_passwd == '' or master_database == '':
print("No master postgresql user provided.")
print("Cannot initialize default CKAN db resources. Exiting!")
sys.exit(1)
print("Master DB: " + master_database + " Master User: " + master_user)
ckan_db = DB_Params(ckan_conn_str)
datastorerw_db = DB_Params(datastorerw_conn_str)
datastorero_db = DB_Params(datastorero_conn_str)
# Check to see whether we can connect to the database, exit after 10 mins
check_db_connection(ckan_db)
try:
create_user(ckan_db)
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
try:
create_user(datastorerw_db)
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
try:
create_user(datastorero_db)
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
try:
create_db(ckan_db)
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
try:
create_db(datastorerw_db)
except(Exception, psycopg2.DatabaseError) as error:
print("ERROR DB: ", error)
def execute_sql_script(ckan_dbp, datastorero_dbp, datastorerw_dbp, script_path):
# Connect to the database
conn = psycopg2.connect(
user=master_user,
host=datastorerw_dbp.db_host,
password=master_passwd,
database=datastorerw_dbp.db_name
)
try:
# Create a cursor
cur = conn.cursor()
# Execute the SQL script
with open(script_path, 'r') as f:
sql_script = f.read()
# Replace placeholders with actual values
sql_script = sql_script.replace('{datastoredb}', datastorerw_dbp.db_name)
sql_script = sql_script.replace('{readuser}', datastorero_dbp.db_user)
sql_script = sql_script.replace('{writeuser}', datastorerw_dbp.db_user)
sql_script = sql_script.replace('{mainuser}', ckan_dbp.db_user)
sql_script = sql_script.replace('{maindb}', ckan_dbp.db_name)
print("CKAN DB User:", ckan_dbp.db_user)
# Execute the SQL script
cur.execute(sql_script)
# Commit the changes
conn.commit()
print("SQL script executed successfully.")
print("CKAN DB User:", ckan_dbp.db_user)
print("read/write DB User:", datastorerw_dbp.db_user)
print("read/write DB name:", datastorerw_dbp.db_name)
print("read/write host:", datastorerw_dbp.db_host)
print("read DB user:", datastorero_dbp.db_user)
print("read DB name:", datastorero_dbp.db_name)
except psycopg2.Error as e:
print(f"Error executing SQL script: {str(e)}")
finally:
# Close the cursor and the connection
cur.close()
conn.close()
set_permissions = './set_permissions.sql'
# Print the current working directory
print("Current working directory:", os.getcwd())
# Check if the file exists
if os.path.isfile(set_permissions):
print("File exists.")
# Call the execute_sql_script function with the appropriate arguments
execute_sql_script(ckan_db, datastorero_db, datastorerw_db, set_permissions)
else:
print("File not found.")

View File

@ -0,0 +1,108 @@
/*
This script configures the permissions for the datastore.
It ensures that the datastore read-only user will only be able to select from
the datastore database but has no create/write/edit permission or any
permissions on other databases. You must execute this script as a database
superuser on the PostgreSQL server that hosts your datastore database.
For example, if PostgreSQL is running locally and the "postgres" user has the
appropriate permissions (as in the default Ubuntu PostgreSQL install), you can
run:
ckan -c /etc/ckan/default/ckan.ini datastore set-permissions | sudo -u postgres psql
Or, if your PostgreSQL server is remote, you can pipe the permissions script
over SSH:
ckan -c /etc/ckan/default/ckan.ini datastore set-permissions | ssh dbserver sudo -u postgres psql
*/
-- Most of the following commands apply to an explicit database or to the whole
-- 'public' schema, and could be executed anywhere. But ALTER DEFAULT
-- PERMISSIONS applies to the current database, and so we must be connected to
-- the datastore DB:
--\connect {datastoredb}
-- revoke permissions for the read-only user
REVOKE CREATE ON SCHEMA public FROM PUBLIC;
REVOKE USAGE ON SCHEMA public FROM PUBLIC;
GRANT CREATE ON SCHEMA public TO {mainuser};
GRANT USAGE ON SCHEMA public TO {mainuser};
GRANT CREATE ON SCHEMA public TO {writeuser};
GRANT USAGE ON SCHEMA public TO {writeuser};
-- take connect permissions from main db
REVOKE CONNECT ON DATABASE {maindb} FROM {readuser};
-- grant select permissions for read-only user
GRANT CONNECT ON DATABASE {datastoredb} TO {readuser};
GRANT USAGE ON SCHEMA public TO {readuser};
-- grant access to current tables and views to read-only user
GRANT SELECT ON ALL TABLES IN SCHEMA public TO {readuser};
-- grant access to new tables and views by default
ALTER DEFAULT PRIVILEGES FOR USER {writeuser} IN SCHEMA public
GRANT SELECT ON TABLES TO {readuser};
-- a view for listing valid table (resource id) and view names
CREATE OR REPLACE VIEW "_table_metadata" AS
SELECT DISTINCT
substr(md5(dependee.relname || COALESCE(dependent.relname, '')), 0, 17) AS "_id",
dependee.relname AS name,
dependee.oid AS oid,
dependent.relname AS alias_of
FROM
pg_class AS dependee
LEFT OUTER JOIN pg_rewrite AS r ON r.ev_class = dependee.oid
LEFT OUTER JOIN pg_depend AS d ON d.objid = r.oid
LEFT OUTER JOIN pg_class AS dependent ON d.refobjid = dependent.oid
WHERE
(dependee.oid != dependent.oid OR dependent.oid IS NULL) AND
-- is a table (from pg_tables view definition)
-- or is a view (from pg_views view definition)
(dependee.relkind = 'r'::"char" OR dependee.relkind = 'v'::"char")
AND dependee.relnamespace = (
SELECT oid FROM pg_namespace WHERE nspname='public')
ORDER BY dependee.oid DESC;
ALTER VIEW "_table_metadata" OWNER TO {writeuser};
GRANT SELECT ON "_table_metadata" TO {readuser};
-- _full_text fields are now updated by a trigger when set to NULL
CREATE OR REPLACE FUNCTION populate_full_text_trigger() RETURNS trigger
AS $body$
BEGIN
IF NEW._full_text IS NOT NULL THEN
RETURN NEW;
END IF;
NEW._full_text := (
SELECT to_tsvector(string_agg(value, ' '))
FROM json_each_text(row_to_json(NEW.*))
WHERE key NOT LIKE '\_%');
RETURN NEW;
END;
$body$ LANGUAGE plpgsql;
ALTER FUNCTION populate_full_text_trigger() OWNER TO {writeuser};
-- migrate existing tables that don't have full text trigger applied
DO $body$
BEGIN
EXECUTE coalesce(
(SELECT string_agg(
'CREATE TRIGGER zfulltext BEFORE INSERT OR UPDATE ON ' ||
quote_ident(relname) || ' FOR EACH ROW EXECUTE PROCEDURE ' ||
'populate_full_text_trigger();', ' ')
FROM pg_class
LEFT OUTER JOIN pg_trigger AS t
ON t.tgrelid = relname::regclass AND t.tgname = 'zfulltext'
WHERE relkind = 'r'::"char" AND t.tgname IS NULL
AND relnamespace = (
SELECT oid FROM pg_namespace WHERE nspname='public')),
'SELECT 1;');
END;
$body$;

View File

@ -0,0 +1,2 @@
psycopg2-binary==2.9.3
sqlalchemy==1.3.5