diff --git a/.github/workflows/master_merge.yml b/.github/workflows/master_merge.yml index 37aaf2d..5d5d03e 100644 --- a/.github/workflows/master_merge.yml +++ b/.github/workflows/master_merge.yml @@ -246,3 +246,49 @@ jobs: ghcr.io/keitaroinc/datapusher:${{ steps.datapusher.outputs.IMAGE_TAG }} cache-from: type=local,src=/tmp/.buildx-cache-datapusher cache-to: type=local,mode=max,dest=/tmp/.buildx-cache-datapusher + + build-psql-init: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }} + + - name: Login to GitHub Container Registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.CR_PAT }} + + - name: Cache Docker layers + uses: actions/cache@v2 + with: + path: /tmp/.buildx-cache-psql-init + key: ${{ runner.os }}-buildx-psql-init-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx-psql-init + + - name: Get docker tag for psql-init image + id: psql-init + run: | + echo "::set-output name=IMAGE_TAG::$(awk -F '=' '/IMAGE_TAG/{print $2}' ./images/psql-init/Dockerfile)" + + - name: Build and push psql-init + uses: docker/build-push-action@v2 + with: + context: ./images/psql-init + file: ./images/psql-init/Dockerfile + push: true + tags: | + keitaro/psql-init:${{ steps.psql-init.outputs.IMAGE_TAG }} + ghcr.io/keitaroinc/psql-init:${{ steps.psql-init.outputs.IMAGE_TAG }} + cache-from: type=local,src=/tmp/.buildx-cache-psql-init + cache-to: type=local,mode=max,dest=/tmp/.buildx-cache-psql-init diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml index 4619032..c37d6e9 100644 --- a/.github/workflows/pr_checks.yml +++ b/.github/workflows/pr_checks.yml @@ -182,3 +182,34 @@ jobs: tags: keitaro/ckandatapusher:${{ steps.datapusher.outputs.IMAGE_TAG }} cache-from: type=local,src=/tmp/.buildx-cache-datapusher cache-to: type=local,mode=max,dest=/tmp/.buildx-cache-datapusher + + build-psql-init: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + + - name: Cache Docker layers + uses: actions/cache@v2 + with: + path: /tmp/.buildx-cache-psql-init + key: ${{ runner.os }}-buildx-psql-init-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx-psql-init + + - name: Get docker tag for psql-init image + id: psql-init + run: | + echo "::set-output name=IMAGE_TAG::$(awk -F '=' '/IMAGE_TAG/{print $2}' ./images/psql-init/Dockerfile)" + + - name: Build psql-init + uses: docker/build-push-action@v2 + with: + context: ./images/psql-init + file: ./images/psql-init/Dockerfile + push: false + tags: keitaro/psql-init:${{ steps.psql-init.outputs.IMAGE_TAG }} + cache-from: type=local,src=/tmp/.buildx-cache-psql-init + cache-to: type=local,mode=max,dest=/tmp/.buildx-cache-psql-init \ No newline at end of file diff --git a/images/psql-init/Dockerfile b/images/psql-init/Dockerfile new file mode 100644 index 0000000..65fdf5e --- /dev/null +++ b/images/psql-init/Dockerfile @@ -0,0 +1,21 @@ + +# Start with a lightweight base image +FROM python:3.9-alpine + +# Used by Github Actions to tag the image with +ENV IMAGE_TAG=0.0.1 + +# Set the working directory in the container +WORKDIR /srv + +# Copy the requirements file to the container +COPY requirements.txt . + +# Install the Python dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Copy the rest of the application code to the container +COPY psql-init/ . + +CMD ["python", "/srv/psql-init.py"] + diff --git a/images/psql-init/psql-init/psql-init.py b/images/psql-init/psql-init/psql-init.py new file mode 100644 index 0000000..6eb25cb --- /dev/null +++ b/images/psql-init/psql-init/psql-init.py @@ -0,0 +1,277 @@ +""" +Copyright (c) 2020 Keitaro AB + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import os +import sys +import subprocess +import re +import psycopg2 +from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT +from psycopg2.extensions import AsIs +from sqlalchemy.engine.url import make_url + + +ckan_conn_str = os.environ.get('CKAN_SQLALCHEMY_URL', '') +datastorerw_conn_str = os.environ.get('CKAN_DATASTORE_WRITE_URL', '') +datastorero_conn_str = os.environ.get('CKAN_DATASTORE_READ_URL', '') + +master_user = os.environ.get('PSQL_MASTER', '') +master_passwd = os.environ.get('PSQL_PASSWD', '') +master_database = os.environ.get('PSQL_DB', '') + + +class DB_Params: + def __init__(self, conn_str): + self.db_user = make_url(conn_str).username + self.db_passwd = make_url(conn_str).password + self.db_host = make_url(conn_str).host + self.db_name = make_url(conn_str).database + + +def check_db_connection(db_params, retry=None): + + print('Checking whether database is up...') + + if retry is None: + retry = 20 + elif retry == 0: + print('Giving up...') + sys.exit(1) + + try: + con = psycopg2.connect(user=master_user, + host=db_params.db_host, + password=master_passwd, + database=master_database) + + except psycopg2.Error as e: + print((str(e))) + print('Unable to connect to the database...try again in a while.') + import time + time.sleep(30) + check_db_connection(db_params, retry=retry - 1) + else: + con.close() + + +def create_user(db_params): + con = None + try: + con = psycopg2.connect(user=master_user, + host=db_params.db_host, + password=master_passwd, + database=master_database) + con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + cur = con.cursor() + print("Creating user " + db_params.db_user.split("@")[0]) + cur.execute('CREATE ROLE "%s" ' + + 'WITH ' + + 'LOGIN NOSUPERUSER INHERIT ' + + 'CREATEDB NOCREATEROLE NOREPLICATION ' + + 'PASSWORD %s', + (AsIs(db_params.db_user.split("@")[0]), + db_params.db_passwd,)) + except(Exception, psycopg2.DatabaseError) as error: + print("ERROR DB: ", error) + finally: + cur.close() + con.close() + + +def create_db(db_params): + con = None + try: + con = psycopg2.connect(user=master_user, + host=db_params.db_host, + password=master_passwd, + database=master_database) + con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + cur = con.cursor() + cur.execute('GRANT "' + db_params.db_user.split("@") + [0] + '" TO "' + master_user.split("@")[0] + '"') + print("Creating database " + db_params.db_name + " with owner " + + db_params.db_user.split("@")[0]) + cur.execute('CREATE DATABASE ' + db_params.db_name + ' OWNER "' + + db_params.db_user.split("@")[0] + '"') + cur.execute('GRANT ALL PRIVILEGES ON DATABASE ' + + db_params.db_name + ' TO "' + + db_params.db_user.split("@")[0] + '"') + if is_pg_buffercache_enabled(db_params) >= 1: + # FIXME: This is a known issue with pg_buffercache access + # For more info check this thread: + # https://www.postgresql.org/message-id/21009351582737086%40iva6-22e79380f52c.qloud-c.yandex.net + print("Granting privileges on pg_monitor to " + + db_params.db_user.split("@")[0]) + cur.execute('GRANT "pg_monitor" TO "' + db_params.db_user.split("@")[0] + '"') + except(Exception, psycopg2.DatabaseError) as error: + print("ERROR DB: ", error) + finally: + cur.close() + con.close() + + +def is_pg_buffercache_enabled(db_params): + con = None + result = None + try: + con = psycopg2.connect(user=master_user, + host=db_params.db_host, + password=master_passwd, + database=db_params.db_name) + con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + cur = con.cursor() + cur.execute("SELECT count(*) FROM pg_extension " + + "WHERE extname = 'pg_buffercache'") + result = cur.fetchone() + except(Exception, psycopg2.DatabaseError) as error: + print("ERROR DB: ", error) + finally: + cur.close() + con.close() + return result[0] + + +def set_datastore_permissions(datastore_rw_params, datastore_ro_params, sql): + con = None + try: + con = psycopg2.connect(user=master_user, + host=datastore_rw_params.db_host, + password=master_passwd, + database=datastore_rw_params.db_name) + con.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT) + cur = con.cursor() + cur.execute('GRANT CONNECT ON DATABASE ' + + datastore_rw_params.db_name + + ' TO ' + datastore_ro_params.db_user.split("@")[0]) + if is_pg_buffercache_enabled(datastore_rw_params) >= 1: + print("Granting privileges on pg_monitor to " + + datastore_ro_params.db_user.split("@")[0]) + cur.execute('GRANT ALL PRIVILEGES ON TABLE pg_monitor TO ' + + datastore_ro_params.db_user.split("@")[0]) + print("Setting datastore permissions\n") + print(sql) + cur.execute(sql) + print("Datastore permissions applied.") + except Exception as error: + print("ERROR DB: ", error) + finally: + cur.close() + con.close() + + +if master_user == '' or master_passwd == '' or master_database == '': + print("No master postgresql user provided.") + print("Cannot initialize default CKAN db resources. Exiting!") + sys.exit(1) + +print("Master DB: " + master_database + " Master User: " + master_user) + +ckan_db = DB_Params(ckan_conn_str) +datastorerw_db = DB_Params(datastorerw_conn_str) +datastorero_db = DB_Params(datastorero_conn_str) + + +# Check to see whether we can connect to the database, exit after 10 mins +check_db_connection(ckan_db) + +try: + create_user(ckan_db) +except(Exception, psycopg2.DatabaseError) as error: + print("ERROR DB: ", error) + +try: + create_user(datastorerw_db) +except(Exception, psycopg2.DatabaseError) as error: + print("ERROR DB: ", error) + +try: + create_user(datastorero_db) +except(Exception, psycopg2.DatabaseError) as error: + print("ERROR DB: ", error) + +try: + create_db(ckan_db) +except(Exception, psycopg2.DatabaseError) as error: + print("ERROR DB: ", error) + +try: + create_db(datastorerw_db) +except(Exception, psycopg2.DatabaseError) as error: + print("ERROR DB: ", error) + + +def execute_sql_script(ckan_dbp, datastorero_dbp, datastorerw_dbp, script_path): + # Connect to the database + conn = psycopg2.connect( + user=master_user, + host=datastorerw_dbp.db_host, + password=master_passwd, + database=datastorerw_dbp.db_name + ) + + try: + # Create a cursor + cur = conn.cursor() + + # Execute the SQL script + with open(script_path, 'r') as f: + sql_script = f.read() + + # Replace placeholders with actual values + + sql_script = sql_script.replace('{datastoredb}', datastorerw_dbp.db_name) + sql_script = sql_script.replace('{readuser}', datastorero_dbp.db_user) + sql_script = sql_script.replace('{writeuser}', datastorerw_dbp.db_user) + sql_script = sql_script.replace('{mainuser}', ckan_dbp.db_user) + sql_script = sql_script.replace('{maindb}', ckan_dbp.db_name) + + print("CKAN DB User:", ckan_dbp.db_user) + + # Execute the SQL script + cur.execute(sql_script) + + # Commit the changes + conn.commit() + + print("SQL script executed successfully.") + + print("CKAN DB User:", ckan_dbp.db_user) + print("read/write DB User:", datastorerw_dbp.db_user) + print("read/write DB name:", datastorerw_dbp.db_name) + print("read/write host:", datastorerw_dbp.db_host) + print("read DB user:", datastorero_dbp.db_user) + print("read DB name:", datastorero_dbp.db_name) + + except psycopg2.Error as e: + print(f"Error executing SQL script: {str(e)}") + + finally: + # Close the cursor and the connection + cur.close() + conn.close() + +set_permissions = './set_permissions.sql' + +# Print the current working directory +print("Current working directory:", os.getcwd()) + +# Check if the file exists +if os.path.isfile(set_permissions): + print("File exists.") + # Call the execute_sql_script function with the appropriate arguments + execute_sql_script(ckan_db, datastorero_db, datastorerw_db, set_permissions) +else: + print("File not found.") diff --git a/images/psql-init/psql-init/set_permissions.sql b/images/psql-init/psql-init/set_permissions.sql new file mode 100644 index 0000000..e7be428 --- /dev/null +++ b/images/psql-init/psql-init/set_permissions.sql @@ -0,0 +1,108 @@ +/* +This script configures the permissions for the datastore. + +It ensures that the datastore read-only user will only be able to select from +the datastore database but has no create/write/edit permission or any +permissions on other databases. You must execute this script as a database +superuser on the PostgreSQL server that hosts your datastore database. + +For example, if PostgreSQL is running locally and the "postgres" user has the +appropriate permissions (as in the default Ubuntu PostgreSQL install), you can +run: + + ckan -c /etc/ckan/default/ckan.ini datastore set-permissions | sudo -u postgres psql + +Or, if your PostgreSQL server is remote, you can pipe the permissions script +over SSH: + + ckan -c /etc/ckan/default/ckan.ini datastore set-permissions | ssh dbserver sudo -u postgres psql + +*/ + +-- Most of the following commands apply to an explicit database or to the whole +-- 'public' schema, and could be executed anywhere. But ALTER DEFAULT +-- PERMISSIONS applies to the current database, and so we must be connected to +-- the datastore DB: +--\connect {datastoredb} + +-- revoke permissions for the read-only user +REVOKE CREATE ON SCHEMA public FROM PUBLIC; +REVOKE USAGE ON SCHEMA public FROM PUBLIC; + +GRANT CREATE ON SCHEMA public TO {mainuser}; +GRANT USAGE ON SCHEMA public TO {mainuser}; + +GRANT CREATE ON SCHEMA public TO {writeuser}; +GRANT USAGE ON SCHEMA public TO {writeuser}; + +-- take connect permissions from main db +REVOKE CONNECT ON DATABASE {maindb} FROM {readuser}; + +-- grant select permissions for read-only user +GRANT CONNECT ON DATABASE {datastoredb} TO {readuser}; +GRANT USAGE ON SCHEMA public TO {readuser}; + +-- grant access to current tables and views to read-only user +GRANT SELECT ON ALL TABLES IN SCHEMA public TO {readuser}; + +-- grant access to new tables and views by default +ALTER DEFAULT PRIVILEGES FOR USER {writeuser} IN SCHEMA public + GRANT SELECT ON TABLES TO {readuser}; + +-- a view for listing valid table (resource id) and view names +CREATE OR REPLACE VIEW "_table_metadata" AS + SELECT DISTINCT + substr(md5(dependee.relname || COALESCE(dependent.relname, '')), 0, 17) AS "_id", + dependee.relname AS name, + dependee.oid AS oid, + dependent.relname AS alias_of + FROM + pg_class AS dependee + LEFT OUTER JOIN pg_rewrite AS r ON r.ev_class = dependee.oid + LEFT OUTER JOIN pg_depend AS d ON d.objid = r.oid + LEFT OUTER JOIN pg_class AS dependent ON d.refobjid = dependent.oid + WHERE + (dependee.oid != dependent.oid OR dependent.oid IS NULL) AND + -- is a table (from pg_tables view definition) + -- or is a view (from pg_views view definition) + (dependee.relkind = 'r'::"char" OR dependee.relkind = 'v'::"char") + AND dependee.relnamespace = ( + SELECT oid FROM pg_namespace WHERE nspname='public') + ORDER BY dependee.oid DESC; +ALTER VIEW "_table_metadata" OWNER TO {writeuser}; +GRANT SELECT ON "_table_metadata" TO {readuser}; + +-- _full_text fields are now updated by a trigger when set to NULL +CREATE OR REPLACE FUNCTION populate_full_text_trigger() RETURNS trigger +AS $body$ + BEGIN + IF NEW._full_text IS NOT NULL THEN + RETURN NEW; + END IF; + NEW._full_text := ( + SELECT to_tsvector(string_agg(value, ' ')) + FROM json_each_text(row_to_json(NEW.*)) + WHERE key NOT LIKE '\_%'); + RETURN NEW; + END; +$body$ LANGUAGE plpgsql; +ALTER FUNCTION populate_full_text_trigger() OWNER TO {writeuser}; + +-- migrate existing tables that don't have full text trigger applied +DO $body$ + BEGIN + EXECUTE coalesce( + (SELECT string_agg( + 'CREATE TRIGGER zfulltext BEFORE INSERT OR UPDATE ON ' || + quote_ident(relname) || ' FOR EACH ROW EXECUTE PROCEDURE ' || + 'populate_full_text_trigger();', ' ') + FROM pg_class + LEFT OUTER JOIN pg_trigger AS t + ON t.tgrelid = relname::regclass AND t.tgname = 'zfulltext' + WHERE relkind = 'r'::"char" AND t.tgname IS NULL + AND relnamespace = ( + SELECT oid FROM pg_namespace WHERE nspname='public')), + 'SELECT 1;'); + END; +$body$; + diff --git a/images/psql-init/requirements.txt b/images/psql-init/requirements.txt new file mode 100644 index 0000000..0662697 --- /dev/null +++ b/images/psql-init/requirements.txt @@ -0,0 +1,2 @@ +psycopg2-binary==2.9.3 +sqlalchemy==1.3.5