ansible-role-R/templates/update_r_packages.sh.j2

357 lines
14 KiB
Django/Jinja

#!/bin/bash
#
# TODO: kill an old process if it is running from too much time (12 hours?)
# using something like ps -o etimes= -p "$PROCNUM"
#
export PATH="$PATH:/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin"
RETVAl=
PARAMS=$#
ACTION=$1
PROCNUM=$$
OLDPROC=
OLDPROC_RUNNING=
LOCKDIR=/var/run
LOCK_FILE=$LOCKDIR/.update_r_pkgs.lock
TMP_FILES_DIR=/var/tmp/r_pkgs_update
# We cannot answer questions
DEBIAN_FRONTEND=noninteractive
R_CRAN_MIRROR="{{ r_cran_mirror_site }}"
{% if r_package_updater_via_git %}
R_PKGS_FROM_SVN="False"
R_PKGS_FROM_GIT="{{ r_package_updater_via_git }}"
R_PKGS_DIR="r-packages-list"
R_PKGS_URL="{{ r_package_updater_git_repo }}"
R_PKGS_BASE_DIR="{{ r_packages_git_base_dir }}"
GIT_UPDATE_STATUS=
{% elif r_package_updater_via_subversion %}
R_PKGS_FROM_SVN="{{ r_package_updater_via_subversion }}"
R_PKGS_FROM_GIT="False"
R_PKGS_DIR=RPackagesManagement
R_PKGS_URL="{{ r_package_updater_subversion_repo }}"
R_PKGS_BASE_DIR="{{ r_packages_svn_base_dir }}"
SVN_UPDATE_STATUS=
{% endif %}
R_PKGS_FILES_PREFIX="{{ r_packages_files_prefix }}"
RSCRIPT_OPTIONS="--slave --no-site-file --no-init-file --no-save --no-restore-history"
# In seconds. 60*60*6=21600s (6h)
UPDATER_PROCESS_MAX_RUNTIME=21600
{% if r_packages_updater_for_github_only %}
UPDATER_DEFAULT_BEHAVIOUR="github_only"
{% else %}
UPDATER_DEFAULT_BEHAVIOUR="everything"
{% endif %}
{% if r_packages_updater_run_once %}
INSTALLER_BEHAVIOUR="once"
{% else %}
INSTALLER_BEHAVIOUR="always"
{% endif %}
# - debian packages list format:
# one package per line
DEB_PKGS_SKIP=0
DEBIAN_PKGS_LIST_URL="{{ r_debian_packages_list_url | default('') }}"
EL_PKGS_LIST_URL="{{ r_el_packages_list_url | default('') }}"
PKGS_LIST=
# - R packages list format:
# name[:mirror]
CRAN_PKGS_SKIP=0
R_PKGS_LIST_URL="{{ r_cran_packages_list_url | default('') }}"
R_PKGS_LIST=
# - R versioned packages list format:
# name:version
CRAN_VERSIONED_PKGS_SKIP=0
R_VERSIONED_PKGS_LIST_URL="{{ r_cran_versioned_packages_list_url | default('') }}"
R_VERSIONED_PKGS_LIST=
# - R packages from github list format:
# - owner/package
GITHUB_PKGS_SKIP=0
R_PKGS_FROM_GITHUB_LIST_URL="{{ r_github_packages_list_url | default('') }}"
R_PKGS_GITHUB=
R_PKGS_ALWAYS_UPGRADE_GITHUB={{ r_github_always_upgrade | default(True) }}
trap "logger 'update_r_packages: trap intercepted, exiting.' ; cleanup" SIGHUP SIGINT SIGTERM
function cleanup() {
logger "update_r_packages: cleaning up"
rm -f $LOCK_FILE
rm -fr $TMP_FILES_DIR
}
function usage() {
if [ $PARAMS -ne 1 ] ; then
echo "Need at least an argument: 'upgrade' or 'install'."
echo "- 'upgrade' installs new packages and upgrades the already installed ones."
echo "- 'install' installs new packages."
cleanup
exit 1
fi
}
function get_args() {
if [ "$ACTION" != "upgrade" -a "$ACTION" != "install" ] ; then
usage
fi
}
function fail() {
logger "update_r_packages: Something went wrong, exiting."
cleanup
exit 1
}
function init_env() {
if [ -f $LOCK_FILE ] ; then
OLDPROC=$( cat $LOCK_FILE )
OLDPROC_RUNNING=$( ps auwwx | grep -v grep | grep $OLDPROC | awk '{ print $2 }' )
RETVAL=$?
if [ ! -z "$OLDPROC_RUNNING" ] ; then
logger "update_r_packages: pid of the already running process: $OLDPROC_RUNNING"
OLDPROC_RUNNING_TIME=$( ps -o etimes= -p ${OLDPROC_RUNNING} )
if [ $OLDPROC_RUNNING_TIME -gt $UPDATER_PROCESS_MAX_RUNTIME ] ; then
logger "update_r_packages: process $OLDPROC_RUNNING was running for $OLDPROC_RUNNING_TIME seconds. Got stuck, killing it"
kill -9 $OLDPROC_RUNNING
cleanup
else
logger "update_r_packages: another process is running, exiting."
exit 0
fi
else
logger "update_r_packages: lock file exist but the process not. Continuing."
rm -fr $TMP_FILES_DIR
fi
else
logger 'update_r_packages: no other jobs running, proceeding.'
fi
RETVAL=
echo "$PROCNUM" > $LOCK_FILE
mkdir -p $TMP_FILES_DIR
}
function get_data_files() {
logger "update_r_packages: get the single files from http."
# Get the packages list
if [ -z $DEBIAN_PKGS_LIST_URL ] ; then
DEB_PKGS_SKIP=1
logger "update_r_packages: the debian packages list is not available."
else
PKGS_LIST=$( mktemp $TMP_FILES_DIR/rdebs.XXXXXXX )
logger "update_r_packages: getting the debian packages list."
wget -q -o /dev/null -O $PKGS_LIST $DEBIAN_PKGS_LIST_URL
fi
if [ -z $R_PKGS_LIST_URL ] ; then
CRAN_PKGS_SKIP=1
logger "update_r_packages: the CRAN packages list is not available."
else
R_PKGS_LIST=$( mktemp $TMP_FILES_DIR/rpkgs.XXXXXXX )
logger "update_r_packages: getting the R packages list that will be installed from CRAN"
wget -q -o /dev/null -O $R_PKGS_LIST $R_PKGS_LIST_URL
fi
if [ -z $R_VERSIONED_PKGS_LIST_URL ] ; then
CRAN_VERSIONED_PKGS_SKIP=1
logger "update_r_packages: the CRAN versioned packages list is not available."
else
R_PKGS_LIST=$( mktemp $TMP_FILES_DIR/rpkgs.XXXXXXX )
logger "update_r_packages: getting the R versioned packages list that will be installed from CRAN"
wget -q -o /dev/null -O $R_VERSIONED_PKGS_LIST $R_VERSIONED_PKGS_LIST_URL
fi
if [ -z $R_PKGS_FROM_GITHUB_LIST_URL ] ; then
GITHUB_PKGS_SKIP=1
logger "update_r_packages: the Github packages list is not available."
else
R_PKGS_GITHUB=$( mktemp $TMP_FILES_DIR/rpkgsgithub.XXXXXXX )
logger "update_r_packages: getting the R packages list that will be installed from github"
wget -q -o /dev/null -O $R_PKGS_GITHUB $R_PKGS_FROM_GITHUB_LIST_URL
fi
}
function get_data_files_from_svn() {
logger "update_r_packages: files from a SVN repo."
if [ -d $R_PKGS_BASE_DIR/$R_PKGS_DIR ] ; then
logger "update_r_packages: SVN update"
cd $R_PKGS_BASE_DIR/$R_PKGS_DIR
SVN_CLEANUP_OP=$( svn cleanup )
SVN_UPDATE_OP=$( svn update | tail -1 | grep Updated >/dev/null 2>&1 )
SVN_UPDATE_STATUS=$?
else
cd $R_PKGS_BASE_DIR
logger "update_r_packages: first SVN checkout."
svn co $R_PKGS_URL >/dev/null 2>&1
fi
PKGS_LIST=$R_PKGS_BASE_DIR/$R_PKGS_DIR/${R_PKGS_FILES_PREFIX}r_deb_pkgs.txt
R_PKGS_LIST=$R_PKGS_BASE_DIR/$R_PKGS_DIR/${R_PKGS_FILES_PREFIX}r_cran_pkgs.txt
R_VERSIONED_PKGS_LIST=$R_PKGS_BASE_DIR/$R_PKGS_DIR/${R_PKGS_FILES_PREFIX}r_cran_versioned_pkgs.txt
R_PKGS_GITHUB=$R_PKGS_BASE_DIR/$R_PKGS_DIR/${R_PKGS_FILES_PREFIX}r_github_pkgs.txt
}
function get_data_files_from_git() {
logger "update_r_packages: files from a git repo."
if [ -d $R_PKGS_BASE_DIR/$R_PKGS_DIR ] ; then
logger "update_r_packages: git pull"
cd $R_PKGS_BASE_DIR/$R_PKGS_DIR
GIT_UPDATE_OP=$( git pull | tail -1 | grep Already >/dev/null 2>&1 )
GIT_UPDATE_STATUS=$?
else
cd $R_PKGS_BASE_DIR
logger "update_r_packages: first git clone."
git clone $R_PKGS_URL >/dev/null 2>&1
GIT_UPDATE_STATUS=1
fi
PKGS_LIST=$R_PKGS_BASE_DIR/$R_PKGS_DIR/${R_PKGS_FILES_PREFIX}r_deb_pkgs.txt
R_PKGS_LIST=$R_PKGS_BASE_DIR/$R_PKGS_DIR/${R_PKGS_FILES_PREFIX}r_cran_pkgs.txt
R_VERSIONED_PKGS_LIST=$R_PKGS_BASE_DIR/$R_PKGS_DIR/${R_PKGS_FILES_PREFIX}r_cran_versioned_pkgs.txt
R_PKGS_GITHUB=$R_PKGS_BASE_DIR/$R_PKGS_DIR/${R_PKGS_FILES_PREFIX}r_github_pkgs.txt
}
{% if ansible_distribution_file_variety == "Debian" %}
function distribution_pkgs() {
if [ $DEB_PKGS_SKIP -eq 0 ] ; then
# Update the apt cache and install the packages in non interactive mode
logger "update_r_packages: Installing the debian dependencies"
if [ -z "$(find /var/cache/apt/pkgcache.bin -mmin -360)" ]; then
apt-get update -q >/dev/null 2>&1
else
logger "update_r_packages: APT cache not updated"
fi
>/var/log/update_r_debs.log
xargs -a $PKGS_LIST apt-get install ${deb_pkg} -q -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" >>/var/log/update_r_debs.log 2>&1
apt-get autoremove -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold" >> /var/log/update_r_debs.log 2>&1
else
logger "update_r_packages: skipping the debian packages installation"
fi
}
function remove_r_install_packages_lock_files() {
# install.packages leaves lock files around if the process crashes
rm -fr {{ r_sitelib_path }}/00LOCK-*
}
{% endif %}
{% if ansible_distribution_file_variety == "RedHat" %}
function distribution_pkgs() {
if [ $EL_PKGS_SKIP -eq 0 ] ; then
# Update the apt cache and install the packages in non interactive mode
logger "update_r_packages: Installing the EL dependencies"
while read el_pkg ; do
yum -y install ${el_pkg} >>/var/log/update_r_el.log 2>&1
done < $PKGS_LIST
else
logger "update_r_packages: skipping the EL packages installation"
fi
}
function remove_r_install_packages_lock_files() {
# install.packages leaves lock files around if the process crashes
rm -fr {{ r_sitelib_path }}/00LOCK-*
}
{% endif %}
function r_cran_pkgs() {
if [ $CRAN_PKGS_SKIP -eq 0 ] ; then
logger "update_r_packages: Installing R packages from CRAN"
for l in $( cat $R_PKGS_LIST ) ; do
pkg=$( echo $l | cut -d : -f 1 )
is_mirror_ret=
is_mirror=$( echo $l | grep ':' )
is_mirror_ret=$?
if [ $is_mirror_ret -eq 0 ] ; then
mirror=$( echo $l | cut -d : -f 2- )
else
mirror=$R_CRAN_MIRROR
fi
if [ "$ACTION" == "upgrade" ] ; then
if [ "$UPDATER_DEFAULT_BEHAVIOUR" == "everything" ] ; then
Rscript $RSCRIPT_OPTIONS -e "install.packages(pkgs='$pkg', repos=c('$mirror/'));"
fi
else
Rscript $RSCRIPT_OPTIONS -e "if (! ('$pkg' %in% installed.packages()[,'Package'])) { install.packages(pkgs='$pkg', repos=c('$mirror/')); }"
fi
done
else
logger "update_r_packages: skipping the R CRAN packages installation"
fi
}
function r_cran_versioned_pkgs() {
if [ $CRAN_VERSIONED_PKGS_SKIP -eq 0 ] ; then
logger "update_r_packages: Installing versioned R packages from CRAN"
for l in $( cat $R_VERSIONED_PKGS_LIST ) ; do
pkg=$( echo $l | cut -d : -f 1 )
version=$( echo $l | cut -d : -f 2 )
if [ "$ACTION" == "upgrade" ] ; then
if [ "$UPDATER_DEFAULT_BEHAVIOUR" == "everything" ] ; then
Rscript $RSCRIPT_OPTIONS -e "require(devtools); install_version('$pkg', '$version', repos=c('$mirror/'));"
fi
else
Rscript $RSCRIPT_OPTIONS -e "if (! ('$pkg' %in% installed.packages()[,'Package'])) { require(devtools); install_version('$pkg', '$version', repos=c('$mirror/')); }"
fi
done
else
logger "update_r_packages: skipping the R CRAN versioned packages installation"
fi
}
function r_github_pkgs() {
if [ $GITHUB_PKGS_SKIP -eq 0 ] ; then
logger "update_r_packages: Installing R packages from Github"
for l in $( cat $R_PKGS_GITHUB ) ; do
pkg=$( echo $l | cut -d "/" -f 2 )
github_pkg=$( echo $l | awk -F ":" '{ print $1 }' - )
github_pkg_version=$( echo $l | awk -F ":" '{ print $2 }' - )
if [ ! -z "$github_pkg_version" ] ; then
github_version_to_install="master"
else
github_version_to_install="$github_pkg_version"
fi
if [ "$ACTION" == "upgrade" ] ; then
Rscript $RSCRIPT_OPTIONS -e "require(devtools); require(methods); require(jsonlite) ; package_to_install <- '$github_pkg' ; refs <- jsonlite::read_json(sprintf('https://api.github.com/repos/%s/releases', package_to_install)) ; ref_to_install <- '$github_pkg_version'; if(length(refs)>0) { ref_to_install <- refs[[1]][['tag_name']] } ; devtools::install_github(package_to_install, ref = ref_to_install, upgrade='never')"
else
Rscript $RSCRIPT_OPTIONS -e "if (! ('$pkg' %in% installed.packages()[,'Package'])) { require(devtools); require(methods); require(jsonlite) ; package_to_install <- '$github_pkg' ; refs <- jsonlite::read_json(sprintf('https://api.github.com/repos/%s/releases', package_to_install)) ; ref_to_install <- '$github_pkg_version'; if(length(refs)>0) { ref_to_install <- refs[[1]][['tag_name']] } ; devtools::install_github(package_to_install, ref = ref_to_install, upgrade='never') }"
fi
done
else
logger "update_r_packages: skipping the R GitHub packages installation"
fi
}
#########
# Main
#
usage
get_args
init_env
if [ "$INSTALLER_BEHAVIOUR" == "once" ] ; then
if [ -d $R_PKGS_BASE_DIR/$R_PKGS_DIR ] ; then
logger "update_r_packages: The script run once already. Doing nothing."
cleanup
exit 0
fi
fi
if [ $R_PKGS_FROM_GIT == 'True' ] ; then
get_data_files_from_git
if [ $GIT_UPDATE_STATUS -ne 1 -a "$ACTION" == "install" ] ; then
logger "update_r_packages: nothing new to install from git"
cleanup
exit 0
fi
elif [ $R_PKGS_FROM_SVN == 'True' ] ; then
get_data_files_from_svn
if [ $SVN_UPDATE_STATUS -ne 0 -a "$ACTION" == "install" ] ; then
logger "update_r_packages: nothing new to install from SVN."
cleanup
exit 0
fi
else
get_data_files
fi
distribution_pkgs
remove_r_install_packages_lock_files
r_cran_pkgs
r_cran_versioned_pkgs
r_github_pkgs
cleanup
exit 0