ansible-role-rstudio-server/files/kill-rogue-jobs

75 lines
2.3 KiB
Plaintext
Raw Permalink Normal View History

2021-03-03 13:23:49 +01:00
#!/bin/bash
#
# TODO:
# - print nagios friendly output into a file
# - kill rsession processes older than N days
set -e
set -o pipefail
CMD_NAME=$0
USER_N=
USER_PROCS_TO_KILL=
SPARE_MEM=1048576
USERS_SESSIONS=$( ps -edaf | grep rsession | grep -v defunct | grep -v grep | awk '{ print $10 }' | uniq )
if [ -z "$USERS_SESSIONS" ] ; then
eval logger '$CMD_NAME: There are no active sessions'
exit 0
fi
OUT_DIR=$( mktemp -d -t kill-rogue-jobs.XXXXXXXXXX )
USER_PROCS_LIST=$OUT_DIR/proclist
USER_PROCS_PARENTS=$OUT_DIR/parents
2021-06-25 19:11:02 +02:00
trap 'eval logger "$CMD_NAME: trap intercepted, exiting." ; cleanup' SIGHUP SIGINT SIGTERM
2021-03-03 13:23:49 +01:00
function cleanup() {
2021-06-25 19:11:02 +02:00
rm -fr "$OUT_DIR"
2021-03-03 13:23:49 +01:00
}
function find_rogue_processes() {
eval logger '$CMD_NAME: find_rogue_processes for user $USER_N'
2021-06-25 19:11:02 +02:00
ps -edaf | grep rsession | grep -v grep | grep "${USER_N}" | awk '{ print $3 }' | uniq > "$USER_PROCS_PARENTS"
pgrep -U "${USER_N}" rsession > "$USER_PROCS_LIST"
2021-03-03 13:23:49 +01:00
for parent in $( cat $USER_PROCS_PARENTS ) ; do
2021-06-25 19:11:02 +02:00
grep -v "$parent" "$USER_PROCS_LIST" > $USER_PROCS_LIST.tmp
mv $USER_PROCS_LIST.tmp "$USER_PROCS_LIST"
2021-03-03 13:23:49 +01:00
done
2021-06-25 19:11:02 +02:00
USER_PROCS_TO_KILL=$( cat "$USER_PROCS_LIST" )
2021-03-03 13:23:49 +01:00
}
function exterminate() {
eval logger '$CMD_NAME: exterminate killing user $USER_N processes'
2021-06-25 19:11:02 +02:00
for pid in $( echo "$USER_PROCS_TO_KILL" ) ; do
kill -15 "$pid"
2021-03-03 13:23:49 +01:00
done
}
2021-06-25 19:11:02 +02:00
NUM_CPUS=$( grep -c processor /proc/cpuinfo )
2021-03-03 13:23:49 +01:00
ALLOWED_THREADS=$(( $NUM_CPUS - 1 ))
TOTAL_MEM=$( grep MemTotal /proc/meminfo | awk '{ print $2 }' )
ALLOWED_USED_MEM=$(( $TOTAL_MEM - $SPARE_MEM ))
for USER_N in $( echo $USERS_SESSIONS ) ; do
USER_PROCS=$( ps -edaf | grep rsession | grep -v grep | grep ${USER_N} | wc -l )
USER_MEM=$( ps -eo pid,rss,vsz,args | grep rsession | grep -v grep | grep ${USER_N} | awk '{ print $2}' | paste -sd+ | bc )
if [ $USER_PROCS -gt $ALLOWED_THREADS ] || [ $USER_MEM -gt $ALLOWED_USED_MEM ] ; then
if [ $USER_PROCS -gt $ALLOWED_THREADS ] ; then
eval logger '$CMD_NAME: user $USER_N is running too many processes'
fi
if [ $USER_MEM -gt $ALLOWED_USED_MEM ] ; then
eval logger '$CMD_NAME: user $USER_N is using too much memory'
fi
find_rogue_processes
exterminate
else
eval logger '$CMD_NAME: we do not need to kill any processes for user $USER_N'
fi
done
2021-06-25 19:11:02 +02:00
cleanup
2021-03-03 13:23:49 +01:00
trap cleanup EXIT
exit 0