Matthias Andreas Benkard | b382b10 | 2021-01-02 15:32:21 +0100 | [diff] [blame] | 1 | #!/bin/bash |
| 2 | ######################################################################### |
| 3 | # Script: check_mysql_slavestatus.sh # |
| 4 | # Author: Claudio Kuenzler www.claudiokuenzler.com # |
| 5 | # Purpose: Monitor MySQL Replication status with Nagios # |
| 6 | # Description: Connects to given MySQL hosts and checks for running # |
| 7 | # SLAVE state and delivers additional info # |
| 8 | # Original: This script is a modified version of # |
| 9 | # check mysql slave sql running written by dhirajt # |
| 10 | # Thanks to: Victor Balada Diaz for his ideas added on 20080930 # |
| 11 | # Soren Klintrup for stuff added on 20081015 # |
| 12 | # Marc Feret for Slave_IO_Running check 20111227 # |
| 13 | # Peter Lecki for his mods added on 20120803 # |
| 14 | # Serge Victor for his mods added on 20131223 # |
| 15 | # Omri Bahumi for his fix added on 20131230 # |
| 16 | # Marc Falzon for his option mods added on 20190822 # |
| 17 | # Andreas Pfeiffer for adding socket option on 20190822 # |
| 18 | # History: # |
| 19 | # 2008041700 Original Script modified # |
| 20 | # 2008041701 Added additional info if status OK # |
| 21 | # 2008041702 Added usage of script with params -H -u -p # |
| 22 | # 2008041703 Added bindir variable for multiple platforms # |
| 23 | # 2008041704 Added help because mankind needs help # |
| 24 | # 2008093000 Using /bin/sh instead of /bin/bash # |
| 25 | # 2008093001 Added port for MySQL server # |
| 26 | # 2008093002 Added mysqldir if mysql binary is elsewhere # |
| 27 | # 2008101501 Changed bindir/mysqldir to use PATH # |
| 28 | # 2008101501 Use $() instead of `` to avoid forks # |
| 29 | # 2008101501 Use ${} for variables to prevent problems # |
| 30 | # 2008101501 Check if required commands exist # |
| 31 | # 2008101501 Check if mysql connection works # |
| 32 | # 2008101501 Exit with unknown status at script end # |
| 33 | # 2008101501 Also display help if no option is given # |
| 34 | # 2008101501 Add warning/critical check to delay # |
| 35 | # 2011062200 Add perfdata # |
| 36 | # 2011122700 Checking Slave_IO_Running # |
| 37 | # 2012080300 Changed to use only one mysql query # |
| 38 | # 2012080301 Added warn and crit delay as optional args # |
| 39 | # 2012080302 Added standard -h option for syntax help # |
| 40 | # 2012080303 Added check for mandatory options passed in # |
| 41 | # 2012080304 Added error output from mysql # |
| 42 | # 2012080305 Changed from 'cut' to 'awk' (eliminate ws) # |
| 43 | # 2012111600 Do not show password in error output # |
| 44 | # 2013042800 Changed PATH to use existing PATH, too # |
| 45 | # 2013050800 Bugfix in PATH export # |
| 46 | # 2013092700 Bugfix in PATH export # |
| 47 | # 2013092701 Bugfix in getopts # |
| 48 | # 2013101600 Rewrite of threshold logic and handling # |
| 49 | # 2013101601 Optical clean up # |
| 50 | # 2013101602 Rewrite help output # |
| 51 | # 2013101700 Handle Slave IO in 'Connecting' state # |
| 52 | # 2013101701 Minor changes in output, handling UNKWNON situations now # |
| 53 | # 2013101702 Exit CRITICAL when Slave IO in Connecting state # |
| 54 | # 2013123000 Slave_SQL_Running also matched Slave_SQL_Running_State # |
| 55 | # 2015011600 Added 'moving' check to catch possible connection issues # |
| 56 | # 2015011900 Use its own threshold for replication moving check # |
| 57 | # 2019082200 Add support for mysql option file # |
| 58 | # 2019082201 Improve password security (remove from mysql cli) # |
| 59 | # 2019082202 Added socket parameter (-S) # |
| 60 | # 2019082203 Use default port 3306, makes -P optional # |
| 61 | # 2019082204 Fix moving subcheck, improve documentation # |
| 62 | ######################################################################### |
| 63 | # Usage: ./check_mysql_slavestatus.sh (-o file|(-H dbhost [-P port]|-S socket) -u dbuser -p dbpass) [-s connection] [-w integer] [-c integer] [-m integer] |
| 64 | ######################################################################### |
| 65 | help="\ncheck_mysql_slavestatus.sh (c) 2008-2019 GNU GPLv2 licence |
| 66 | Usage: $0 (-o file|(-H dbhost [-P port]|-S socket) -u username -p password) [-s connection] [-w integer] [-c integer] [-m]\n |
| 67 | Options:\n-o Path to option file containing connection settings (e.g. /home/nagios/.my.cnf). Note: If this option is used, -H, -u, -p parameters will become optional\n-H Hostname or IP of slave server\n-P MySQL Port of slave server (optional, defaults to 3306)\n-u Username of DB-user\n-p Password of DB-user\n-S database socket\n-s Connection name (optional, with multi-source replication)\n-w Replication delay in seconds for Warning status (optional)\n-c Replication delay in seconds for Critical status (optional)\n-m Threshold in seconds since when replication did not move (compares the slaves log position)\n |
| 68 | Attention: The DB-user you type in must have CLIENT REPLICATION rights on the DB-server. Example:\n\tGRANT REPLICATION CLIENT on *.* TO 'nagios'@'%' IDENTIFIED BY 'secret';" |
| 69 | |
| 70 | STATE_OK=0 # define the exit code if status is OK |
| 71 | STATE_WARNING=1 # define the exit code if status is Warning (not really used) |
| 72 | STATE_CRITICAL=2 # define the exit code if status is Critical |
| 73 | STATE_UNKNOWN=3 # define the exit code if status is Unknown |
| 74 | export PATH=$PATH:/usr/local/bin:/usr/bin:/bin # Set path |
| 75 | crit="No" # what is the answer of MySQL Slave_SQL_Running for a Critical status? |
| 76 | ok="Yes" # what is the answer of MySQL Slave_SQL_Running for an OK status? |
| 77 | port="-P 3306" # on which tcp port is the target MySQL slave listening? |
| 78 | |
| 79 | for cmd in mysql awk grep expr [ |
| 80 | do |
| 81 | if ! `which ${cmd} &>/dev/null` |
| 82 | then |
| 83 | echo "UNKNOWN: This script requires the command '${cmd}' but it does not exist; please check if command exists and PATH is correct" |
| 84 | exit ${STATE_UNKNOWN} |
| 85 | fi |
| 86 | done |
| 87 | |
| 88 | # Check for people who need help |
| 89 | ######################################################################### |
| 90 | if [ "${1}" = "--help" -o "${#}" = "0" ]; |
| 91 | then |
| 92 | echo -e "${help}"; |
| 93 | exit 1; |
| 94 | fi |
| 95 | |
| 96 | # Important given variables for the DB-Connect |
| 97 | ######################################################################### |
| 98 | while getopts "H:P:u:p:S:s:w:c:o:m:h" Input; |
| 99 | do |
| 100 | case ${Input} in |
| 101 | H) host="-h ${OPTARG}";slavetarget=${OPTARG};; |
| 102 | P) port="-P ${OPTARG}";; |
| 103 | u) user="-u ${OPTARG}";; |
| 104 | p) password="${OPTARG}"; export MYSQL_PWD="${OPTARG}";; |
| 105 | S) socket="-S ${OPTARG}";; |
| 106 | s) connection=\"${OPTARG}\";; |
| 107 | w) warn_delay=${OPTARG};; |
| 108 | c) crit_delay=${OPTARG};; |
| 109 | o) optfile="--defaults-extra-file=${OPTARG}";; |
| 110 | m) moving=${OPTARG};; |
| 111 | h) echo -e "${help}"; exit 1;; |
| 112 | \?) echo "Wrong option given. Check help (-h, --help) for usage." |
| 113 | exit 1 |
| 114 | ;; |
| 115 | esac |
| 116 | done |
| 117 | |
| 118 | # Check if we can write to tmp |
| 119 | ######################################################################### |
| 120 | test -w /tmp && tmpfile="/tmp/mysql_slave_${slavetarget}_pos.txt" |
| 121 | |
| 122 | # Connect to the DB server and check for informations |
| 123 | ######################################################################### |
| 124 | # Check whether all required arguments were passed in (either option file or full connection settings) |
| 125 | if [[ -z "${optfile}" && -z "${host}" && -z "${socket}" ]]; then |
| 126 | echo -e "Missing required parameter(s)"; exit ${STATE_UNKNOWN} |
| 127 | elif [[ -n "${host}" && (-z "${user}" || -z "${password}") ]]; then |
| 128 | echo -e "Missing required parameter(s)"; exit ${STATE_UNKNOWN} |
| 129 | elif [[ -n "${socket}" && (-z "${user}" || -z "${password}") ]]; then |
| 130 | echo -e "Missing required parameter(s)"; exit ${STATE_UNKNOWN} |
| 131 | fi |
| 132 | |
| 133 | # Connect to the DB server and store output in vars |
| 134 | if [[ -n $socket ]]; then |
| 135 | ConnectionResult=$(mysql ${optfile} ${socket} ${user} -e "show slave ${connection} status\G" 2>&1) |
| 136 | else |
| 137 | ConnectionResult=$(mysql ${optfile} ${host} ${port} ${user} -e "show slave ${connection} status\G" 2>&1) |
| 138 | fi |
| 139 | |
| 140 | if [ -z "`echo "${ConnectionResult}" |grep Slave_IO_State`" ]; then |
| 141 | echo -e "CRITICAL: Unable to connect to server" |
| 142 | exit ${STATE_CRITICAL} |
| 143 | fi |
| 144 | check=`echo "${ConnectionResult}" |grep Slave_SQL_Running: | awk '{print $2}'` |
| 145 | checkio=`echo "${ConnectionResult}" |grep Slave_IO_Running: | awk '{print $2}'` |
| 146 | masterinfo=`echo "${ConnectionResult}" |grep Master_Host: | awk '{print $2}'` |
| 147 | delayinfo=`echo "${ConnectionResult}" |grep Seconds_Behind_Master: | awk '{print $2}'` |
| 148 | readpos=`echo "${ConnectionResult}" |grep Read_Master_Log_Pos: | awk '{print $2}'` |
| 149 | execpos=`echo "${ConnectionResult}" |grep Exec_Master_Log_Pos: | awk '{print $2}'` |
| 150 | |
| 151 | # Output of different exit states |
| 152 | ######################################################################### |
| 153 | if [ ${check} = "NULL" ]; then |
| 154 | echo "CRITICAL: Slave_SQL_Running is answering NULL"; exit ${STATE_CRITICAL}; |
| 155 | fi |
| 156 | |
| 157 | if [ ${check} = ${crit} ]; then |
| 158 | echo "CRITICAL: ${host}:${port} Slave_SQL_Running: ${check}"; exit ${STATE_CRITICAL}; |
| 159 | fi |
| 160 | |
| 161 | if [ ${checkio} = ${crit} ]; then |
| 162 | echo "CRITICAL: ${host} Slave_IO_Running: ${checkio}"; exit ${STATE_CRITICAL}; |
| 163 | fi |
| 164 | |
| 165 | if [ ${checkio} = "Connecting" ]; then |
| 166 | echo "CRITICAL: ${host} Slave_IO_Running: ${checkio}"; exit ${STATE_CRITICAL}; |
| 167 | fi |
| 168 | |
| 169 | if [ ${check} = ${ok} ] && [ ${checkio} = ${ok} ]; then |
| 170 | # Delay thresholds are set |
| 171 | if [[ -n ${warn_delay} ]] && [[ -n ${crit_delay} ]]; then |
| 172 | if ! [[ ${warn_delay} -gt 0 ]]; then echo "Warning threshold must be a valid integer greater than 0"; exit $STATE_UNKNOWN; fi |
| 173 | if ! [[ ${crit_delay} -gt 0 ]]; then echo "Warning threshold must be a valid integer greater than 0"; exit $STATE_UNKNOWN; fi |
| 174 | if [[ -z ${warn_delay} ]] || [[ -z ${crit_delay} ]]; then echo "Both warning and critical thresholds must be set"; exit $STATE_UNKNOWN; fi |
| 175 | if [[ ${warn_delay} -gt ${crit_delay} ]]; then echo "Warning threshold cannot be greater than critical"; exit $STATE_UNKNOWN; fi |
| 176 | |
| 177 | if [[ ${delayinfo} -ge ${crit_delay} ]] |
| 178 | then echo "CRITICAL: Slave is ${delayinfo} seconds behind Master | delay=${delayinfo}s"; exit ${STATE_CRITICAL} |
| 179 | elif [[ ${delayinfo} -ge ${warn_delay} ]] |
| 180 | then echo "WARNING: Slave is ${delayinfo} seconds behind Master | delay=${delayinfo}s"; exit ${STATE_WARNING} |
| 181 | else |
| 182 | # Everything looks OK here but now let us check if the replication is moving |
| 183 | if [[ -n ${moving} ]] && [[ -n ${tmpfile} ]] && [[ $readpos -eq $execpos ]] |
| 184 | then |
| 185 | #echo "Debug: Read pos is $readpos - Exec pos is $execpos" |
| 186 | # Check if tmp file exists |
| 187 | curtime=`date +%s` |
| 188 | if [[ -w $tmpfile ]] |
| 189 | then |
| 190 | tmpfiletime=`date +%s -r $tmpfile` |
| 191 | if [[ `expr $curtime - $tmpfiletime` -gt ${moving} ]] |
| 192 | then |
| 193 | exectmp=`cat $tmpfile` |
| 194 | #echo "Debug: Exec pos in tmpfile is $exectmp" |
| 195 | if [[ $exectmp -eq $execpos ]] |
| 196 | then |
| 197 | # The value read from the tmp file and from db are the same. Replication hasnt moved! |
| 198 | echo "WARNING: Slave replication has not moved in ${moving} seconds. Manual check required."; exit ${STATE_WARNING} |
| 199 | else |
| 200 | # Replication has moved since the tmp file was written. Delete tmp file and output OK. |
| 201 | rm $tmpfile |
| 202 | echo "OK: Slave SQL running: ${check} Slave IO running: ${checkio} / master: ${masterinfo} / slave is ${delayinfo} seconds behind master | delay=${delayinfo}s"; exit ${STATE_OK}; |
| 203 | fi |
| 204 | else |
| 205 | echo "OK: Slave SQL running: ${check} Slave IO running: ${checkio} / master: ${masterinfo} / slave is ${delayinfo} seconds behind master | delay=${delayinfo}s"; exit ${STATE_OK}; |
| 206 | fi |
| 207 | else |
| 208 | echo "$execpos" > $tmpfile |
| 209 | echo "OK: Slave SQL running: ${check} Slave IO running: ${checkio} / master: ${masterinfo} / slave is ${delayinfo} seconds behind master | delay=${delayinfo}s"; exit ${STATE_OK}; |
| 210 | fi |
| 211 | else # Everything OK (no additional moving check) |
| 212 | echo "OK: Slave SQL running: ${check} Slave IO running: ${checkio} / master: ${masterinfo} / slave is ${delayinfo} seconds behind master | delay=${delayinfo}s"; exit ${STATE_OK}; |
| 213 | fi |
| 214 | fi |
| 215 | else |
| 216 | # Without delay thresholds |
| 217 | echo "OK: Slave SQL running: ${check} Slave IO running: ${checkio} / master: ${masterinfo} / slave is ${delayinfo} seconds behind master | delay=${delayinfo}s" |
| 218 | exit ${STATE_OK}; |
| 219 | fi |
| 220 | fi |
| 221 | |
| 222 | echo "UNKNOWN: should never reach this part (Slave_SQL_Running is ${check}, Slave_IO_Running is ${checkio})" |
| 223 | exit ${STATE_UNKNOWN} |