import psycopg2 import re import argparse import os import logging # Default values DEFAULT_CONFIG_FILE = "/home/life/Portal-Bundle/portal-setup-wizard.properties" DEFAULT_TEMPLATE_FILE = "template-robots.txt" DEFAULT_PLACEHOLDER = "{{HOSTNAME}}" DEFAULT_OUTPUT_DIR = "currents_robots" LOG_FILE = "update_robots.log" # Function to read database connection parameters from the configuration file def read_db_config(config_file_path): config = {} with open(config_file_path, 'r') as file: for line in file: if line.strip() and '=' in line and not line.startswith("#"): key, value = line.strip().split('=', 1) config[key.strip()] = value.strip() return config # Function to read the content of the template file def read_template_file(file_path): with open(file_path, 'r') as file: return file.read().strip().replace('\n', '_SAFE_NEWLINE_CHARACTER_').replace('\r\n', '_SAFE_NEWLINE_CHARACTER_') # Function to update the typesettings field for all groups containing false-robots.txt def update_typesettings(cursor, template_content, placeholder_pattern, execute_updates, specific_hostname=None): try: # Query to extract the necessary data with a join query = """ SELECT g.groupid, g.typesettings, v.hostname FROM public.group_ g JOIN public.layoutset l ON g.groupid = l.groupid JOIN public.virtualhost v ON l.layoutsetid = v.layoutsetid WHERE g.typesettings LIKE '%false-robots.txt%' """ if specific_hostname: query += " AND v.hostname = %s" cursor.execute(query, (specific_hostname,)) else: cursor.execute(query) rows = cursor.fetchall() for row in rows: groupid, current_typesettings, hostname = row # Replace the placeholder in the template with the hostname value new_false_robots = template_content.replace(placeholder_pattern, hostname) # Update the value of false-robots.txt updated_typesettings = re.sub( r'false-robots\.txt=[^¶]*', f'false-robots.txt={new_false_robots}', current_typesettings, flags=re.DOTALL ) # Update query update_query = "UPDATE public.group_ SET typesettings = %s WHERE groupid = %s;" if execute_updates: cursor.execute(update_query, (updated_typesettings, groupid)) executed_query = cursor.mogrify(update_query, (updated_typesettings, groupid)).decode('utf-8') restore_query = cursor.mogrify(update_query, (current_typesettings, groupid)).decode('utf-8') logging.info(f"Executed: \n{executed_query}\n") logging.info(f"### Restore command: \n{restore_query}\n") else: print(cursor.mogrify(update_query, (updated_typesettings, groupid)).decode('utf-8')) except Exception as e: print(f"Error during update: {e}") logging.error(f"Error during update: {e}") # Function to print the list of current vhosts def print_current_vhosts(cursor): try: cursor.execute(""" SELECT v.hostname FROM public.virtualhost v """) rows = cursor.fetchall() print("List of current vhosts:") for row in rows: print(row[0]) except Exception as e: print(f"Error retrieving vhosts: {e}") # Function to print the list of current false-robots.txt values def print_current_robots(cursor): try: cursor.execute(""" SELECT g.typesettings, v.hostname FROM public.group_ g JOIN public.layoutset l ON g.groupid = l.groupid JOIN public.virtualhost v ON l.layoutsetid = v.layoutsetid WHERE g.typesettings LIKE '%false-robots.txt%' """) rows = cursor.fetchall() print("List of current false-robots.txt values and related hostnames:") for row in rows: match = re.search(r'false-robots\.txt=([^¶]*)', row[0]) if match: print(f"Hostname: {row[1]} - Robots: {match.group(1)}") except Exception as e: print(f"Error retrieving false-robots.txt values: {e}") # Function to save the current vhosts to files def save_current_vhosts(cursor, output_dir): try: os.makedirs(output_dir, exist_ok=True) cursor.execute(""" SELECT v.hostname, g.typesettings FROM public.virtualhost v JOIN public.layoutset l ON v.layoutsetid = l.layoutsetid JOIN public.group_ g ON l.groupid = g.groupid WHERE g.typesettings LIKE '%false-robots.txt%' """) rows = cursor.fetchall() for row in rows: hostname, typesettings = row file_path = os.path.join(output_dir, f"robots_{hostname}.txt") with open(file_path, 'w') as file: match = re.search(r'false-robots\.txt=([^¶]*)', typesettings) if match: file.write(match.group(1)) print(f"Saved {hostname} robots.txt to {file_path}") file_path = os.path.join(output_dir, f"typesettings_{hostname}.txt") with open(file_path, 'w') as file: file.write(typesettings) print(f"Saved {hostname} typesettings to {file_path}") except Exception as e: print(f"Error saving vhosts: {e}") # Function to print the differences between current and new false-robots.txt values def print_differences(cursor, template_content, placeholder_pattern, specific_hostname=None): try: # Query to extract the necessary data with a join query = """ SELECT g.typesettings, v.hostname FROM public.group_ g JOIN public.layoutset l ON g.groupid = l.groupid JOIN public.virtualhost v ON l.layoutsetid = v.layoutsetid WHERE g.typesettings LIKE '%false-robots.txt%' """ if specific_hostname: query += " AND v.hostname = %s" cursor.execute(query, (specific_hostname,)) else: cursor.execute(query) rows = cursor.fetchall() print("Differences between current and new false-robots.txt values:") for row in rows: current_typesettings, hostname = row match = re.search(r'false-robots\.txt=([^¶]*)', current_typesettings) if match: current_false_robots = match.group(1) new_false_robots = template_content.replace(placeholder_pattern, hostname) if current_false_robots != new_false_robots: print(f"Hostname: {hostname}") print(f"Current: {current_false_robots}") print(f"New: {new_false_robots}") print("-" * 40) except Exception as e: print(f"Error during difference check: {e}") # Main function def main(): parser = argparse.ArgumentParser(description="Update the false-robots.txt field in the group_ table.", formatter_class=argparse.RawTextHelpFormatter, epilog=f""" Examples of usage: 1. Use a configuration file for database parameters and print the update queries: python3 update_robots.py --config-file {DEFAULT_CONFIG_FILE} --template-file {DEFAULT_TEMPLATE_FILE} 2. Specify database parameters directly from the command line and print the update queries: python3 update_robots.py --db-host postgres --db-port 5432 --db-name liferay_db --db-user infra_bundle_dev --db-password pass_db --template-file {DEFAULT_TEMPLATE_FILE} 3. Execute the update queries: python3 update_robots.py --config-file {DEFAULT_CONFIG_FILE} --template-file {DEFAULT_TEMPLATE_FILE} --execute 4. Print the list of current vhosts: python3 update_robots.py --config-file {DEFAULT_CONFIG_FILE} --list-vhosts 5. Print the list of current false-robots.txt values and related hostnames: python3 update_robots.py --config-file {DEFAULT_CONFIG_FILE} --list-robots 6. Save the current vhosts to files: python3 update_robots.py --config-file {DEFAULT_CONFIG_FILE} --save-vhosts --output-dir {DEFAULT_OUTPUT_DIR} 7. Update the false-robots.txt for a specific hostname: python3 update_robots.py --config-file {DEFAULT_CONFIG_FILE} --template-file {DEFAULT_TEMPLATE_FILE} --hostname specific.hostname.com 8. Print the differences between current and new false-robots.txt values: python3 update_robots.py --config-file {DEFAULT_CONFIG_FILE} --template-file {DEFAULT_TEMPLATE_FILE} --print-differences """) parser.add_argument("--config-file", default=DEFAULT_CONFIG_FILE, help=f"Path to the configuration file with database parameters (default: {DEFAULT_CONFIG_FILE})") parser.add_argument("--db-host", help="Database host") parser.add_argument("--db-port", type=int, help="Database port") parser.add_argument("--db-name", help="Database name") parser.add_argument("--db-user", help="Database user") parser.add_argument("--db-password", help="Database password") parser.add_argument("--template-file", default=DEFAULT_TEMPLATE_FILE, help=f"Path to the template file for false-robots.txt (default: {DEFAULT_TEMPLATE_FILE})") parser.add_argument("--placeholder", default=DEFAULT_PLACEHOLDER, help=f"Placeholder pattern to replace in the template file (default: {DEFAULT_PLACEHOLDER})") parser.add_argument("--execute", action="store_true", help="Execute the update queries (default is to print the queries)") parser.add_argument("--print-updates", action="store_true", help="Print only (not execute) the update queries (default is to print the queries)") parser.add_argument("--list-vhosts", action="store_true", help="Print the list of current vhosts") parser.add_argument("--list-robots", action="store_true", help="Print the list of current false-robots.txt values and related hostnames") parser.add_argument("--save-vhosts", action="store_true", help=f"Save the current vhosts to files (default directory: {DEFAULT_OUTPUT_DIR})") parser.add_argument("--output-dir", default=DEFAULT_OUTPUT_DIR, help=f"Output directory to save the vhosts (default: {DEFAULT_OUTPUT_DIR})") parser.add_argument("--hostname", help="Specific hostname to update") parser.add_argument("--print-differences", action="store_true", help="Print the differences between current and new false-robots.txt values") args = parser.parse_args() if not any(vars(args).values()): parser.print_help() return # Read the template file content template_content = read_template_file(args.template_file) # Initialize database connection parameters with None db_host = db_port = db_name = db_user = db_password = None # Determine the database connection parameters if args.config_file: db_config = read_db_config(args.config_file) db_host = db_config['jdbc.default.url'].split('/')[2].split(':')[0] db_port = int(db_config['jdbc.default.url'].split('/')[2].split(':')[1]) db_name = db_config['jdbc.default.url'].split('/')[-1] db_user = db_config['jdbc.default.username'] db_password = db_config['jdbc.default.password'] if args.db_host: db_host = args.db_host if args.db_port: db_port = args.db_port if args.db_name: db_name = args.db_name if args.db_user: db_user = args.db_user if args.db_password: db_password = args.db_password # Configure logging logging.basicConfig(filename=LOG_FILE, level=logging.INFO, format='%(asctime)s %(message)s') # Connect to the database conn = psycopg2.connect( host=db_host, port=db_port, dbname=db_name, user=db_user, password=db_password ) cursor = conn.cursor() if args.list_vhosts: print_current_vhosts(cursor) elif args.list_robots: print_current_robots(cursor) elif args.save_vhosts: save_current_vhosts(cursor, args.output_dir) elif args.print_differences: print_differences(cursor, template_content, args.placeholder, args.hostname) elif args.execute: update_typesettings(cursor, template_content, args.placeholder, True, args.hostname) print("robots.txt updated. Clean the Liferay database cache on all the instances (Configuration => Server Administration => Clear the database cache)") elif args.print_updates: update_typesettings(cursor, template_content, args.placeholder, False, args.hostname) else: parser.print_help() return conn.commit() cursor.close() conn.close() if __name__ == "__main__": main()