#!/bin/bash # # TODO: # - print nagios friendly output into a file # - kill rsession processes older than N days # set -e set -o pipefail CMD_NAME=$0 USER_N= USER_PROCS_TO_KILL= SPARE_MEM=1048576 USERS_SESSIONS=$( ps -edaf | grep rsession | grep -v defunct | grep -v grep | awk '{ print $10 }' | uniq ) if [ -z "$USERS_SESSIONS" ] ; then eval logger '$CMD_NAME: There are no active sessions' exit 0 fi OUT_DIR=$( mktemp -d -t kill-rogue-jobs.XXXXXXXXXX ) USER_PROCS_LIST=$OUT_DIR/proclist USER_PROCS_PARENTS=$OUT_DIR/parents trap "eval logger '$CMD_NAME: trap intercepted, exiting.' ; cleanup" SIGHUP SIGINT SIGTERM function cleanup() { rm -fr $OUT_DIR } function find_rogue_processes() { eval logger '$CMD_NAME: find_rogue_processes for user $USER_N' ps -edaf | grep rsession | grep -v grep | grep ${USER_N} | awk '{ print $3 }' | uniq > $USER_PROCS_PARENTS ps -edaf | grep rsession | grep -v grep | grep ${USER_N} | awk '{ print $2 }' | uniq > $USER_PROCS_LIST for parent in $( cat $USER_PROCS_PARENTS ) ; do grep -v $parent $USER_PROCS_LIST > $USER_PROCS_LIST.tmp mv $USER_PROCS_LIST.tmp $USER_PROCS_LIST done USER_PROCS_TO_KILL=$( cat $USER_PROCS_LIST ) } function exterminate() { eval logger '$CMD_NAME: exterminate killing user $USER_N processes' for pid in $( echo $USER_PROCS_TO_KILL ) ; do kill -15 $pid done } NUM_CPUS=$( grep processor /proc/cpuinfo | wc -l ) ALLOWED_THREADS=$(( $NUM_CPUS - 1 )) TOTAL_MEM=$( grep MemTotal /proc/meminfo | awk '{ print $2 }' ) ALLOWED_USED_MEM=$(( $TOTAL_MEM - $SPARE_MEM )) for USER_N in $( echo $USERS_SESSIONS ) ; do USER_PROCS=$( ps -edaf | grep rsession | grep -v grep | grep ${USER_N} | wc -l ) USER_MEM=$( ps -eo pid,rss,vsz,args | grep rsession | grep -v grep | grep ${USER_N} | awk '{ print $2}' | paste -sd+ | bc ) if [ $USER_PROCS -gt $ALLOWED_THREADS ] || [ $USER_MEM -gt $ALLOWED_USED_MEM ] ; then if [ $USER_PROCS -gt $ALLOWED_THREADS ] ; then eval logger '$CMD_NAME: user $USER_N is running too many processes' fi if [ $USER_MEM -gt $ALLOWED_USED_MEM ] ; then eval logger '$CMD_NAME: user $USER_N is using too much memory' fi find_rogue_processes exterminate else eval logger '$CMD_NAME: we do not need to kill any processes for user $USER_N' fi done trap cleanup EXIT exit 0