#!/bin/sh
#
############################################################################
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 2 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You may have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
# USA.
#
# An on-line copy of the GNU General Public License can be found
# http://www.fsf.org/copyleft/gpl.html.
############################################################################
#
# log_processlist.sh (C) 2011 Simon J Mudd
#
# This script maintains a number of servers statistics in /var/log/mysql_pl.
#
# It is intended to be run every minute from cron, and takes no parameters
# as indicated below:
#
# * * * * * /usr/local/bin/log_processlist.sh
#
# It must run as root and will refuse to run if you try as a different
# user.
#
# Access to the db instances is currently exclusively done by looking for
# and then connecting to instances using defaults files matching the
# the following pattern: /root/.my*.cnf.
#
# The script has been run successfully on MySQL versions 5.0, 5.1 and
# 5.5.
#
# Running this script minutely from cron gives you a minutely overview of
# what the server and instances are doing and can be invaluable for
# debugging problems. Output includes the following information:
# - ps output for the server
# - SHOW FULL PROCESSLIST
# - SHOW SLAVE STATUS ( if the instance is a slave )
# - SHOW ENGINE INNODB STATUS (if the instance uses InnoDB)
#
# Files are dropped into /var/log/mysql_pl/XXX where XXX is (Mon,Tue,Wed,
# ...). Each file is named according to the time of day and instance
# 'name', e.g. 15_30xx.
# names:
#
# Config file-> Suffix
# .my.cnf -> empty
# .my-xx.cnf -> xx
# .my-aa.cnf -> aa
#
# Exit codes:
# 0 - all is fine.
# 1 - not started by root, refusing to run.
# 2 - there is no mysql command line client to be found.
# 3 - the required instance is not running (cannot be found in the process list).
# 4 - SQL failure
#
# Suggestions for improvement are always welcome and should be directed
# to me at the above email.
[ -n "$DEBUG" ] && set -x
myname=$(basename $0)
lockfile=/tmp/$myname.lock
myuser=$(id -un)
myhome=$(getent passwd $myuser | cut -d: -f6)
myhostname=$(hostname -s)
# reference - get the day time in one go to avoid timing issues.
reference=$(date +%a_%H_%M) # Mon_11_40 for Monday at 11:40 (unless locale is broken)
day_name=$(echo $reference | cut -d_ -f1)
current_time=$(echo $reference | cut -d_ -f2-3)
# We will drop stuff in $logdir/$day_name, and name it $logdir/$day_name/$current_time$suffix (with no dot)
logdir=/var/log/mysql_pl
# for cleaning up old format files.
warning_time=15_00
msg_info () {
echo "$(date +'%b %d %H:%M:%S') $myhostname $myname[$$]: $*"
}
msg_verbose () {
[ -n "$verbose" ] && msg_info "$*"
}
# add the uptime info
show_system_info () {
local log_file_base=$1
local log_file=$log_file_base.unix
echo 'UPTIME:' > $log_file 2>&1
uptime >> $log_file 2>&1
echo 'UNIX_PROCESSES:' >> $log_file 2>&1
ps auwwwx >> $log_file 2>&1
echo 'DISK SPACE' >> $log_file 2>&1
df -Th >> $log_file 2>&1
echo 'MEMORY' >> $log_file 2>&1
free -m >> $log_file 2>&1
}
try_sql () {
local my_cnf=$1
shift
mysql --defaults-file=$my_cnf --silent --execute="$*" > /dev/null 2>&1
return $?
}
# Run a mysql command and APPEND output to logfile
do_sql () {
local logfile=$1
local my_cnf=$2
shift 2
echo "SQL: $*" >> $logfile 2>&1
mysql --defaults-file=$my_cnf --silent --execute="$*" >> $logfile 2>&1
local rc=$?
if [ "$rc" -ne 0 ]; then
echo "$myname: MySQL generates error $rc using defaults-file $my_cnf when executing: $*"
exit 4
fi
}
# We now get the processlist and also the replication delay
get_instance_info () {
local log_file_base=$1
local my_cnf=$2
# determine the suffix (which may be empty)
local suffix=$(echo "$my_cnf" | sed -e 's,^.*/\.my,,' -e 's/\.cnf$//' -e 's/^-//')
local log_file=${log_file_base}${suffix}
# Cleanup output file. We delete it and if NO SQL access works it will stay deleted
test -f $log_file && rm -f $log_file
# Look at the config file and if it's trying to talk to a socket
# see if the instance is up. If not don't bother to try to even
# connect to it. Note: we can't check for the full instance path as it might not match:
# The path MAY be relative or absolute and may not include
# the implicit $datadir prefix. Don't bother to try and
# be clever.
if grep -q socket= $my_cnf; then
config_socket=$(grep socket $my_cnf | sed -e "s/#.*//" -e 's/^ *socket *= *//' | tail -1)
# originally we tried to find a mysqld in the processlist
# which matches out $config_socket, but it is much more
# complicated than that. For example, we might be talking
# with $config_socket=/path/to/data/mysql.sock
# but the server runs with
# --datadir=/path/to/data --socket=mysql.sock
# and we will never find that out using grep.
#
# We try differently now:
# We attempt a connect, and if that works, all is fine.
if [ -n "$config_socket" ]; then
if ! mysql --defaults-file=$my_cnf -e 'SELECT 1' > /dev/null 2>&1; then
return
fi
fi
fi
# Fetch processlist - use normal output so it's easier to filter the command
# list from the shell when looking for long running queries.
if try_sql $my_cnf 'SHOW FULL PROCESSLIST'; then
msg_verbose "my_cnf: $my_cnf - Adding processlist info to $log_file"
do_sql $log_file $my_cnf 'SHOW FULL PROCESSLIST'
elif [ $current_time = $warning_time ]; then
# To avoid spamming the world we only complain at 3 pm (arbitrary time)
# and try to give a bit more info.
cat </dev/null)
if [ $? = 0 ]; then
if ps -p $other_pid >/dev/null; then
msg_verbose "Another copy of this script appears to be running already with pid $other_pid. Exiting."
exit 0
fi
fi
# if the $? fails it means that the file has gone, so it should be ok to continue.
fi
# Keep the sysadmins happy.
# - If no mysql binary can be found just exit.
# mysqld on MySQL Community and Enterprise RPMS are normally in /usr/sbin
# mysqld on RedHat system rpms are normally in /usr/libexec
# mysqld on InfoBright rpms are normally in /usr/local/infobright/bin
mysqld=
mysqld_dirs='/usr/sbin /usr/libexec /usr/local/infobright/bin'
for dir in $mysqld_dirs; do
test -x $dir/mysqld && mysqld=$dir/mysqld
done
if [ -z "$mysqld" ]; then
msg_verbose "Can not find mysqld in $mysqld_paths. Exiting"
exit 2
else
msg_verbose "Found mysqld: $mysqld"
fi
# - If no mysql binary is running assume that the server is down and don't
# bother to try and query it. This avoids cron errors which upset us all.
local count=$(ps -ef | grep $mysqld | grep -v grep | wc -l)
if [ $count = 0 ]; then
msg_verbose "No instances of mysqld have been found"
exit 3
fi
msg_verbose "Checked that we can see at least one instance of mysqld running"
cleanup_lockfile=1
echo $$ > $lockfile
msg_verbose "Checked that no other copy of this script is running"
}
# clean up the lockfile if asked to do this.
cleanup () {
local rc=$?
# This file is used to check for instance info.
if [ -n "$instance_file" ]; then
msg_verbose "Cleaning up instance_file: $instance_file"
rm -f $instance_file
fi
if [ "$cleanup_lockfile" = 1 ]; then
msg_verbose "Cleaning up lock file: $lockfile"
[ -e $lockfile ] && rm -f $lockfile
fi
exit $rc
}
create_target_dir () {
# check that the required target dir exists and if not, make it.
# also drop a sensible README in there.
local logdir=$1
local day_name=$2
if [ ! -d $logdir ]; then
msg_verbose "creating missing directory $logdir"
mkdir $logdir || exit 1
cat < $logdir/README
README
======
This directory contains the output of command $command run from
script $myname which SHOULD be running every minute. Files are created
in the subdirectories Mon, Tue, Wed, ... with the names according to
the time in format hh_mm and a possible suffix according to the
instance name.
The script reads the instance configuration from ~/.my.cnf or ~/.my-XXX.cnf
and generates output for each instance. ~/.my-dba.cnf files are ignored.
EOF
fi
# Create the directory for each day if needed.
if [ ! -d $logdir/$day_name ]; then
msg_verbose "Creating missing directory $logdir/$day_name"
mkdir $logdir/$day_name || exit 1
fi
}
# Sometimes we may have more than one .my-XXX file pointing to the same
# instance. This checks the instance by asking the @@hostname, @@datadir.
# if this has been seen before then we return 0, otherise 1. The instance
# information is stored in $instance_file which is cleaned up when the
# script exits.
seen_instance () {
local instance_file=$1
local my_cnf=$2
local rc=1
# Don't ask about this stupid CHAR(58). It's to avoid quotes inside try_sql
# char(58) is ':'
if try_sql $my_cnf 'SELECT CONCAT(@@hostname,CHAR(58),@@datadir)'; then
instance_info=$(mysql --defaults-file=$my_cnf -BNe 'SELECT CONCAT(@@hostname,CHAR(58),@@datadir)')
grep -q $instance_info $instance_file
rc=$?
# We have not seen this instance before so add it to the list.
if [ $rc != 0 ] ; then
msg_verbose "my_cnf: $my_cnf - NOT SEEN instance info $instance_info before"
echo $instance_info >> $instance_file
else
msg_verbose "my_cnf: $my_cnf - HAVE SEEN instance info: $instance_info before, so ignoring"
fi
else
msg_verbose "Unable to check for instance_info using defaults-file: $my_cnf"
fi
return $rc
}
verbose=
while getopts v flag; do
case $flag in
v) verbose=1;;
*) msg_info "Usage: $myname [-v]"
esac
done
shift $(($OPTIND - 1))
msg_verbose "Running in verbose mode"
# Just in case we're upgrading mysql or the client has disappeared check to see
# if it's available. If it disappears then exit silently.
which mysql >/dev/null 2>&1 || exit 0
trap cleanup 0 1 2 3 9 15
sanity_check
# Create any missing directories if needed.
create_target_dir $logdir $day_name
log_file_base=$logdir/$day_name/$current_time
old_log_file_base=$logdir/${day_name}_${current_time}
instance_file=$(mktemp -t $myname.XXXXXXXX)
# Get some more unix process info which is often handy.
# We now do this BEFORE getting the db output. If the db locks up we still have something to show.
show_system_info $log_file_base
# Iterate over all instances found (EXCEPT .my-dba.cnf) and generate the output.
for my_cnf in $(cd $myhome && ls -a | egrep '^\.my(|-[0-9a-z-]*)\.cnf$' | grep -v '^\.my-dba\.cnf$')
do
# the text LOG_PROCESSLIST: NO will stop us looking at this config file
if ! grep -iq 'LOG_PROCESSLIST:[[:space:]]*NO' $my_cnf; then
if ! seen_instance $instance_file $myhome/$my_cnf; then
get_instance_info $log_file_base $myhome/$my_cnf
fi
fi
done