liferay_scripts/update_robots/update_robots.py

296 lines
13 KiB
Python

import psycopg2
import re
import argparse
import os
import logging
# Default values
DEFAULT_CONFIG_FILE = "/home/life/Portal-Bundle/portal-setup-wizard.properties"
DEFAULT_TEMPLATE_FILE = "template-robots.txt"
DEFAULT_PLACEHOLDER = "{{HOSTNAME}}"
DEFAULT_OUTPUT_DIR = "currents_robots"
LOG_FILE = "update_robots.log"
# Function to read database connection parameters from the configuration file
def read_db_config(config_file_path):
config = {}
with open(config_file_path, 'r') as file:
for line in file:
if line.strip() and '=' in line and not line.startswith("#"):
key, value = line.strip().split('=', 1)
config[key.strip()] = value.strip()
return config
# Function to read the content of the template file
def read_template_file(file_path):
with open(file_path, 'r') as file:
return file.read().strip().replace('\n', '_SAFE_NEWLINE_CHARACTER_').replace('\r\n', '_SAFE_NEWLINE_CHARACTER_')
# Function to update the typesettings field for all groups containing false-robots.txt
def update_typesettings(cursor, template_content, placeholder_pattern, execute_updates, specific_hostname=None):
try:
# Query to extract the necessary data with a join
query = """
SELECT g.groupid, g.typesettings, v.hostname
FROM public.group_ g
JOIN public.layoutset l ON g.groupid = l.groupid
JOIN public.virtualhost v ON l.layoutsetid = v.layoutsetid
WHERE g.typesettings LIKE '%false-robots.txt%'
"""
if specific_hostname:
query += " AND v.hostname = %s"
cursor.execute(query, (specific_hostname,))
else:
cursor.execute(query)
rows = cursor.fetchall()
for row in rows:
groupid, current_typesettings, hostname = row
# Replace the placeholder in the template with the hostname value
new_false_robots = template_content.replace(placeholder_pattern, hostname)
# Update the value of false-robots.txt
updated_typesettings = re.sub(
r'false-robots\.txt=[^¶]*',
f'false-robots.txt={new_false_robots}',
current_typesettings,
flags=re.DOTALL
)
# Update query
update_query = "UPDATE public.group_ SET typesettings = %s WHERE groupid = %s;"
if execute_updates:
cursor.execute(update_query, (updated_typesettings, groupid))
executed_query = cursor.mogrify(update_query, (updated_typesettings, groupid)).decode('utf-8')
restore_query = cursor.mogrify(update_query, (current_typesettings, groupid)).decode('utf-8')
logging.info(f"Executed: \n{executed_query}\n")
logging.info(f"### Restore command: \n{restore_query}\n")
else:
print(cursor.mogrify(update_query, (updated_typesettings, groupid)).decode('utf-8'))
except Exception as e:
print(f"Error during update: {e}")
logging.error(f"Error during update: {e}")
# Function to print the list of current vhosts
def print_current_vhosts(cursor):
try:
cursor.execute("""
SELECT v.hostname
FROM public.virtualhost v
""")
rows = cursor.fetchall()
print("List of current vhosts:")
for row in rows:
print(row[0])
except Exception as e:
print(f"Error retrieving vhosts: {e}")
# Function to print the list of current false-robots.txt values
def print_current_robots(cursor):
try:
cursor.execute("""
SELECT g.typesettings, v.hostname
FROM public.group_ g
JOIN public.layoutset l ON g.groupid = l.groupid
JOIN public.virtualhost v ON l.layoutsetid = v.layoutsetid
WHERE g.typesettings LIKE '%false-robots.txt%'
""")
rows = cursor.fetchall()
print("List of current false-robots.txt values and related hostnames:")
for row in rows:
match = re.search(r'false-robots\.txt=([^¶]*)', row[0])
if match:
print(f"Hostname: {row[1]} - Robots: {match.group(1)}")
except Exception as e:
print(f"Error retrieving false-robots.txt values: {e}")
# Function to save the current vhosts to files
def save_current_vhosts(cursor, output_dir):
try:
os.makedirs(output_dir, exist_ok=True)
cursor.execute("""
SELECT v.hostname, g.typesettings
FROM public.virtualhost v
JOIN public.layoutset l ON v.layoutsetid = l.layoutsetid
JOIN public.group_ g ON l.groupid = g.groupid
WHERE g.typesettings LIKE '%false-robots.txt%'
""")
rows = cursor.fetchall()
for row in rows:
hostname, typesettings = row
file_path = os.path.join(output_dir, f"robots_{hostname}.txt")
with open(file_path, 'w') as file:
match = re.search(r'false-robots\.txt=([^¶]*)', typesettings)
if match:
file.write(match.group(1))
print(f"Saved {hostname} robots.txt to {file_path}")
file_path = os.path.join(output_dir, f"typesettings_{hostname}.txt")
with open(file_path, 'w') as file:
file.write(typesettings)
print(f"Saved {hostname} typesettings to {file_path}")
except Exception as e:
print(f"Error saving vhosts: {e}")
# Function to print the differences between current and new false-robots.txt values
def print_differences(cursor, template_content, placeholder_pattern, specific_hostname=None):
try:
# Query to extract the necessary data with a join
query = """
SELECT g.typesettings, v.hostname
FROM public.group_ g
JOIN public.layoutset l ON g.groupid = l.groupid
JOIN public.virtualhost v ON l.layoutsetid = v.layoutsetid
WHERE g.typesettings LIKE '%false-robots.txt%'
"""
if specific_hostname:
query += " AND v.hostname = %s"
cursor.execute(query, (specific_hostname,))
else:
cursor.execute(query)
rows = cursor.fetchall()
print("Differences between current and new false-robots.txt values:")
for row in rows:
current_typesettings, hostname = row
match = re.search(r'false-robots\.txt=([^¶]*)', current_typesettings)
if match:
current_false_robots = match.group(1)
new_false_robots = template_content.replace(placeholder_pattern, hostname)
if current_false_robots != new_false_robots:
print(f"Hostname: {hostname}")
print(f"Current: {current_false_robots}")
print(f"New: {new_false_robots}")
print("-" * 40)
except Exception as e:
print(f"Error during difference check: {e}")
# Main function
def main():
parser = argparse.ArgumentParser(description="Update the false-robots.txt field in the group_ table.",
formatter_class=argparse.RawTextHelpFormatter,
epilog=f"""
Examples of usage:
1. Use a configuration file for database parameters and print the update queries:
python3 update_robots.py --config-file {DEFAULT_CONFIG_FILE} --template-file {DEFAULT_TEMPLATE_FILE}
2. Specify database parameters directly from the command line and print the update queries:
python3 update_robots.py --db-host postgres --db-port 5432 --db-name liferay_db --db-user infra_bundle_dev --db-password pass_db --template-file {DEFAULT_TEMPLATE_FILE}
3. Execute the update queries:
python3 update_robots.py --config-file {DEFAULT_CONFIG_FILE} --template-file {DEFAULT_TEMPLATE_FILE} --execute
4. Print the list of current vhosts:
python3 update_robots.py --config-file {DEFAULT_CONFIG_FILE} --list-vhosts
5. Print the list of current false-robots.txt values and related hostnames:
python3 update_robots.py --config-file {DEFAULT_CONFIG_FILE} --list-robots
6. Save the current vhosts to files:
python3 update_robots.py --config-file {DEFAULT_CONFIG_FILE} --save-vhosts --output-dir {DEFAULT_OUTPUT_DIR}
7. Update the false-robots.txt for a specific hostname:
python3 update_robots.py --config-file {DEFAULT_CONFIG_FILE} --template-file {DEFAULT_TEMPLATE_FILE} --hostname specific.hostname.com
8. Print the differences between current and new false-robots.txt values:
python3 update_robots.py --config-file {DEFAULT_CONFIG_FILE} --template-file {DEFAULT_TEMPLATE_FILE} --print-differences
""")
parser.add_argument("--config-file", default=DEFAULT_CONFIG_FILE, help=f"Path to the configuration file with database parameters (default: {DEFAULT_CONFIG_FILE})")
parser.add_argument("--db-host", help="Database host")
parser.add_argument("--db-port", type=int, help="Database port")
parser.add_argument("--db-name", help="Database name")
parser.add_argument("--db-user", help="Database user")
parser.add_argument("--db-password", help="Database password")
parser.add_argument("--template-file", default=DEFAULT_TEMPLATE_FILE, help=f"Path to the template file for false-robots.txt (default: {DEFAULT_TEMPLATE_FILE})")
parser.add_argument("--placeholder", default=DEFAULT_PLACEHOLDER, help=f"Placeholder pattern to replace in the template file (default: {DEFAULT_PLACEHOLDER})")
parser.add_argument("--execute", action="store_true", help="Execute the update queries (default is to print the queries)")
parser.add_argument("--print-updates", action="store_true", help="Print only (not execute) the update queries (default is to print the queries)")
parser.add_argument("--list-vhosts", action="store_true", help="Print the list of current vhosts")
parser.add_argument("--list-robots", action="store_true", help="Print the list of current false-robots.txt values and related hostnames")
parser.add_argument("--save-vhosts", action="store_true", help=f"Save the current vhosts to files (default directory: {DEFAULT_OUTPUT_DIR})")
parser.add_argument("--output-dir", default=DEFAULT_OUTPUT_DIR, help=f"Output directory to save the vhosts (default: {DEFAULT_OUTPUT_DIR})")
parser.add_argument("--hostname", help="Specific hostname to update")
parser.add_argument("--print-differences", action="store_true", help="Print the differences between current and new false-robots.txt values")
args = parser.parse_args()
if not any(vars(args).values()):
parser.print_help()
return
# Read the template file content
template_content = read_template_file(args.template_file)
# Initialize database connection parameters with None
db_host = db_port = db_name = db_user = db_password = None
# Determine the database connection parameters
if args.config_file:
db_config = read_db_config(args.config_file)
db_host = db_config['jdbc.default.url'].split('/')[2].split(':')[0]
db_port = int(db_config['jdbc.default.url'].split('/')[2].split(':')[1])
db_name = db_config['jdbc.default.url'].split('/')[-1]
db_user = db_config['jdbc.default.username']
db_password = db_config['jdbc.default.password']
if args.db_host:
db_host = args.db_host
if args.db_port:
db_port = args.db_port
if args.db_name:
db_name = args.db_name
if args.db_user:
db_user = args.db_user
if args.db_password:
db_password = args.db_password
# Configure logging
logging.basicConfig(filename=LOG_FILE, level=logging.INFO,
format='%(asctime)s %(message)s')
# Connect to the database
conn = psycopg2.connect(
host=db_host,
port=db_port,
dbname=db_name,
user=db_user,
password=db_password
)
cursor = conn.cursor()
if args.list_vhosts:
print_current_vhosts(cursor)
elif args.list_robots:
print_current_robots(cursor)
elif args.save_vhosts:
save_current_vhosts(cursor, args.output_dir)
elif args.print_differences:
print_differences(cursor, template_content, args.placeholder, args.hostname)
elif args.execute:
update_typesettings(cursor, template_content, args.placeholder, True, args.hostname)
print("robots.txt updated. Clean the Liferay database cache on all the instances (Configuration => Server Administration => Clear the database cache)")
elif args.print_updates:
update_typesettings(cursor, template_content, args.placeholder, False, args.hostname)
else:
parser.print_help()
return
conn.commit()
cursor.close()
conn.close()
if __name__ == "__main__":
main()