2024-07-12 15:22:49 +02:00
import psycopg2
import re
import argparse
import os
2024-07-12 17:25:31 +02:00
import logging
2024-07-12 15:22:49 +02:00
# Default values
DEFAULT_CONFIG_FILE = " /home/life/Portal-Bundle/portal-setup-wizard.properties "
DEFAULT_TEMPLATE_FILE = " template-robots.txt "
DEFAULT_PLACEHOLDER = " {{ HOSTNAME}} "
DEFAULT_OUTPUT_DIR = " currents_robots "
2024-07-12 17:25:31 +02:00
LOG_FILE = " update_robots.log "
2024-07-12 15:22:49 +02:00
# Function to read database connection parameters from the configuration file
def read_db_config ( config_file_path ) :
config = { }
with open ( config_file_path , ' r ' ) as file :
for line in file :
if line . strip ( ) and ' = ' in line and not line . startswith ( " # " ) :
key , value = line . strip ( ) . split ( ' = ' , 1 )
config [ key . strip ( ) ] = value . strip ( )
return config
# Function to read the content of the template file
def read_template_file ( file_path ) :
with open ( file_path , ' r ' ) as file :
2024-07-12 16:57:53 +02:00
return file . read ( ) . strip ( ) . replace ( ' \n ' , ' _SAFE_NEWLINE_CHARACTER_ ' ) . replace ( ' \r \n ' , ' _SAFE_NEWLINE_CHARACTER_ ' )
2024-07-12 15:22:49 +02:00
# Function to update the typesettings field for all groups containing false-robots.txt
2024-07-12 17:25:31 +02:00
def update_typesettings ( cursor , template_content , placeholder_pattern , execute_updates , specific_hostname = None ) :
2024-07-12 15:22:49 +02:00
try :
# Query to extract the necessary data with a join
query = """
SELECT g . groupid , g . typesettings , v . hostname
FROM public . group_ g
JOIN public . layoutset l ON g . groupid = l . groupid
JOIN public . virtualhost v ON l . layoutsetid = v . layoutsetid
WHERE g . typesettings LIKE ' %f alse-robots.txt % '
"""
if specific_hostname :
query + = " AND v.hostname = %s "
cursor . execute ( query , ( specific_hostname , ) )
else :
cursor . execute ( query )
rows = cursor . fetchall ( )
for row in rows :
groupid , current_typesettings , hostname = row
# Replace the placeholder in the template with the hostname value
new_false_robots = template_content . replace ( placeholder_pattern , hostname )
# Update the value of false-robots.txt
updated_typesettings = re . sub (
r ' false-robots \ .txt=[^¶]* ' ,
f ' false-robots.txt= { new_false_robots } ' ,
current_typesettings ,
flags = re . DOTALL
)
# Update query
2024-07-12 17:25:31 +02:00
update_query = " UPDATE public.group_ SET typesettings = %s WHERE groupid = %s ; "
2024-07-12 15:22:49 +02:00
if execute_updates :
cursor . execute ( update_query , ( updated_typesettings , groupid ) )
2024-07-12 17:25:31 +02:00
executed_query = cursor . mogrify ( update_query , ( updated_typesettings , groupid ) ) . decode ( ' utf-8 ' )
restore_query = cursor . mogrify ( update_query , ( current_typesettings , groupid ) ) . decode ( ' utf-8 ' )
logging . info ( f " Executed: \n { executed_query } \n " )
logging . info ( f " ### Restore command: \n { restore_query } \n " )
2024-07-12 15:22:49 +02:00
else :
2024-07-12 17:25:31 +02:00
print ( cursor . mogrify ( update_query , ( updated_typesettings , groupid ) ) . decode ( ' utf-8 ' ) )
2024-07-12 15:22:49 +02:00
except Exception as e :
print ( f " Error during update: { e } " )
2024-07-12 17:25:31 +02:00
logging . error ( f " Error during update: { e } " )
2024-07-12 15:22:49 +02:00
# Function to print the list of current vhosts
def print_current_vhosts ( cursor ) :
try :
cursor . execute ( """
SELECT v . hostname
FROM public . virtualhost v
""" )
rows = cursor . fetchall ( )
print ( " List of current vhosts: " )
for row in rows :
print ( row [ 0 ] )
except Exception as e :
print ( f " Error retrieving vhosts: { e } " )
# Function to print the list of current false-robots.txt values
def print_current_robots ( cursor ) :
try :
cursor . execute ( """
SELECT g . typesettings , v . hostname
FROM public . group_ g
JOIN public . layoutset l ON g . groupid = l . groupid
JOIN public . virtualhost v ON l . layoutsetid = v . layoutsetid
WHERE g . typesettings LIKE ' %f alse-robots.txt % '
""" )
rows = cursor . fetchall ( )
print ( " List of current false-robots.txt values and related hostnames: " )
for row in rows :
match = re . search ( r ' false-robots \ .txt=([^¶]*) ' , row [ 0 ] )
if match :
print ( f " Hostname: { row [ 1 ] } - Robots: { match . group ( 1 ) } " )
except Exception as e :
print ( f " Error retrieving false-robots.txt values: { e } " )
# Function to save the current vhosts to files
def save_current_vhosts ( cursor , output_dir ) :
try :
os . makedirs ( output_dir , exist_ok = True )
cursor . execute ( """
SELECT v . hostname , g . typesettings
FROM public . virtualhost v
JOIN public . layoutset l ON v . layoutsetid = l . layoutsetid
JOIN public . group_ g ON l . groupid = g . groupid
WHERE g . typesettings LIKE ' %f alse-robots.txt % '
""" )
rows = cursor . fetchall ( )
for row in rows :
hostname , typesettings = row
2024-07-12 16:57:53 +02:00
file_path = os . path . join ( output_dir , f " robots_ { hostname } .txt " )
2024-07-12 15:22:49 +02:00
with open ( file_path , ' w ' ) as file :
match = re . search ( r ' false-robots \ .txt=([^¶]*) ' , typesettings )
if match :
file . write ( match . group ( 1 ) )
2024-07-12 16:57:53 +02:00
print ( f " Saved { hostname } robots.txt to { file_path } " )
file_path = os . path . join ( output_dir , f " typesettings_ { hostname } .txt " )
with open ( file_path , ' w ' ) as file :
file . write ( typesettings )
print ( f " Saved { hostname } typesettings to { file_path } " )
2024-07-12 15:22:49 +02:00
except Exception as e :
print ( f " Error saving vhosts: { e } " )
2024-07-12 16:57:53 +02:00
# Function to print the differences between current and new false-robots.txt values
def print_differences ( cursor , template_content , placeholder_pattern , specific_hostname = None ) :
2024-07-12 15:22:49 +02:00
try :
2024-07-12 16:57:53 +02:00
# Query to extract the necessary data with a join
2024-07-12 15:22:49 +02:00
query = """
SELECT g . typesettings , v . hostname
FROM public . group_ g
JOIN public . layoutset l ON g . groupid = l . groupid
JOIN public . virtualhost v ON l . layoutsetid = v . layoutsetid
WHERE g . typesettings LIKE ' %f alse-robots.txt % '
"""
if specific_hostname :
query + = " AND v.hostname = %s "
cursor . execute ( query , ( specific_hostname , ) )
else :
cursor . execute ( query )
rows = cursor . fetchall ( )
2024-07-12 16:57:53 +02:00
print ( " Differences between current and new false-robots.txt values: " )
2024-07-12 15:22:49 +02:00
for row in rows :
current_typesettings , hostname = row
match = re . search ( r ' false-robots \ .txt=([^¶]*) ' , current_typesettings )
if match :
current_false_robots = match . group ( 1 )
2024-07-12 16:57:53 +02:00
new_false_robots = template_content . replace ( placeholder_pattern , hostname )
if current_false_robots != new_false_robots :
print ( f " Hostname: { hostname } " )
print ( f " Current: { current_false_robots } " )
print ( f " New: { new_false_robots } " )
print ( " - " * 40 )
2024-07-12 15:22:49 +02:00
except Exception as e :
2024-07-12 16:57:53 +02:00
print ( f " Error during difference check: { e } " )
2024-07-12 15:22:49 +02:00
# Main function
def main ( ) :
2024-07-12 16:57:53 +02:00
parser = argparse . ArgumentParser ( description = " Update the false-robots.txt field in the group_ table. " ,
formatter_class = argparse . RawTextHelpFormatter ,
epilog = f """
2024-07-12 15:22:49 +02:00
Examples of usage :
1. Use a configuration file for database parameters and print the update queries :
python3 update_robots . py - - config - file { DEFAULT_CONFIG_FILE } - - template - file { DEFAULT_TEMPLATE_FILE }
2. Specify database parameters directly from the command line and print the update queries :
python3 update_robots . py - - db - host postgres - - db - port 5432 - - db - name liferay_db - - db - user infra_bundle_dev - - db - password pass_db - - template - file { DEFAULT_TEMPLATE_FILE }
3. Execute the update queries :
python3 update_robots . py - - config - file { DEFAULT_CONFIG_FILE } - - template - file { DEFAULT_TEMPLATE_FILE } - - execute
4. Print the list of current vhosts :
python3 update_robots . py - - config - file { DEFAULT_CONFIG_FILE } - - list - vhosts
5. Print the list of current false - robots . txt values and related hostnames :
python3 update_robots . py - - config - file { DEFAULT_CONFIG_FILE } - - list - robots
6. Save the current vhosts to files :
python3 update_robots . py - - config - file { DEFAULT_CONFIG_FILE } - - save - vhosts - - output - dir { DEFAULT_OUTPUT_DIR }
7. Update the false - robots . txt for a specific hostname :
python3 update_robots . py - - config - file { DEFAULT_CONFIG_FILE } - - template - file { DEFAULT_TEMPLATE_FILE } - - hostname specific . hostname . com
8. Print the differences between current and new false - robots . txt values :
python3 update_robots . py - - config - file { DEFAULT_CONFIG_FILE } - - template - file { DEFAULT_TEMPLATE_FILE } - - print - differences
""" )
parser . add_argument ( " --config-file " , default = DEFAULT_CONFIG_FILE , help = f " Path to the configuration file with database parameters (default: { DEFAULT_CONFIG_FILE } ) " )
parser . add_argument ( " --db-host " , help = " Database host " )
parser . add_argument ( " --db-port " , type = int , help = " Database port " )
parser . add_argument ( " --db-name " , help = " Database name " )
parser . add_argument ( " --db-user " , help = " Database user " )
parser . add_argument ( " --db-password " , help = " Database password " )
parser . add_argument ( " --template-file " , default = DEFAULT_TEMPLATE_FILE , help = f " Path to the template file for false-robots.txt (default: { DEFAULT_TEMPLATE_FILE } ) " )
parser . add_argument ( " --placeholder " , default = DEFAULT_PLACEHOLDER , help = f " Placeholder pattern to replace in the template file (default: { DEFAULT_PLACEHOLDER } ) " )
parser . add_argument ( " --execute " , action = " store_true " , help = " Execute the update queries (default is to print the queries) " )
2024-07-12 16:57:53 +02:00
parser . add_argument ( " --print-updates " , action = " store_true " , help = " Print only (not execute) the update queries (default is to print the queries) " )
2024-07-12 15:22:49 +02:00
parser . add_argument ( " --list-vhosts " , action = " store_true " , help = " Print the list of current vhosts " )
parser . add_argument ( " --list-robots " , action = " store_true " , help = " Print the list of current false-robots.txt values and related hostnames " )
parser . add_argument ( " --save-vhosts " , action = " store_true " , help = f " Save the current vhosts to files (default directory: { DEFAULT_OUTPUT_DIR } ) " )
parser . add_argument ( " --output-dir " , default = DEFAULT_OUTPUT_DIR , help = f " Output directory to save the vhosts (default: { DEFAULT_OUTPUT_DIR } ) " )
parser . add_argument ( " --hostname " , help = " Specific hostname to update " )
2024-07-12 16:57:53 +02:00
parser . add_argument ( " --print-differences " , action = " store_true " , help = " Print the differences between current and new false-robots.txt values " )
2024-07-12 15:22:49 +02:00
args = parser . parse_args ( )
2024-07-12 16:57:53 +02:00
if not any ( vars ( args ) . values ( ) ) :
parser . print_help ( )
return
2024-07-12 15:22:49 +02:00
# Read the template file content
template_content = read_template_file ( args . template_file )
2024-07-12 16:57:53 +02:00
# Initialize database connection parameters with None
db_host = db_port = db_name = db_user = db_password = None
2024-07-12 15:22:49 +02:00
# Determine the database connection parameters
if args . config_file :
db_config = read_db_config ( args . config_file )
db_host = db_config [ ' jdbc.default.url ' ] . split ( ' / ' ) [ 2 ] . split ( ' : ' ) [ 0 ]
db_port = int ( db_config [ ' jdbc.default.url ' ] . split ( ' / ' ) [ 2 ] . split ( ' : ' ) [ 1 ] )
db_name = db_config [ ' jdbc.default.url ' ] . split ( ' / ' ) [ - 1 ]
db_user = db_config [ ' jdbc.default.username ' ]
db_password = db_config [ ' jdbc.default.password ' ]
2024-07-12 16:57:53 +02:00
2024-07-12 15:22:49 +02:00
if args . db_host :
db_host = args . db_host
if args . db_port :
db_port = args . db_port
2024-07-12 16:57:53 +02:00
2024-07-12 15:22:49 +02:00
if args . db_name :
db_name = args . db_name
if args . db_user :
db_user = args . db_user
if args . db_password :
db_password = args . db_password
2024-07-12 17:25:31 +02:00
# Configure logging
logging . basicConfig ( filename = LOG_FILE , level = logging . INFO ,
format = ' %(asctime)s %(message)s ' )
2024-07-12 15:22:49 +02:00
# Connect to the database
conn = psycopg2 . connect (
host = db_host ,
port = db_port ,
dbname = db_name ,
user = db_user ,
password = db_password
)
cursor = conn . cursor ( )
if args . list_vhosts :
print_current_vhosts ( cursor )
elif args . list_robots :
print_current_robots ( cursor )
elif args . save_vhosts :
save_current_vhosts ( cursor , args . output_dir )
elif args . print_differences :
2024-07-12 16:57:53 +02:00
print_differences ( cursor , template_content , args . placeholder , args . hostname )
elif args . execute :
2024-07-12 17:25:31 +02:00
update_typesettings ( cursor , template_content , args . placeholder , True , args . hostname )
print ( " robots.txt updated. Clean the Liferay database cache on all the instances (Configuration => Server Administration => Clear the database cache) " )
2024-07-12 16:57:53 +02:00
elif args . print_updates :
2024-07-12 17:25:31 +02:00
update_typesettings ( cursor , template_content , args . placeholder , False , args . hostname )
2024-07-12 15:22:49 +02:00
else :
2024-07-12 16:57:53 +02:00
parser . print_help ( )
return
2024-07-12 15:22:49 +02:00
conn . commit ( )
cursor . close ( )
conn . close ( )
if __name__ == " __main__ " :
main ( )