lunes, 28 de abril de 2014

Test your ELB

ELB (Elastic Load Balancer) is a service provided by AWS to allow you include easily scalable load balancers in your architecture. But sometimes, you detect issues difficult to determinate if they are related with backend instances or ELB itself. Some alternatives in this situation: you can review your CloudWatch metrics trying to find any related issue or you can design a specific test to ensure your environment is working as expected.

In the second situation, several tools can be used to perform an in-depth test (jmeter, for example). But if you want to make an easy test based in a URL, testELB.sh could be what you need.

tesELB.sh will test an URL during a period (by default, ten minutes), making random queries through all associated IPs in your ELB. Additionally, will show you interesting information about:

  • Average response time
  • Maximum response time
  • Percentile 95 response time
  • Average payload size
  • Error request (including percent)
Here is an execution example:

testELB.sh output

Usage:

testELB.sh ELB [LOOP] [URL] [PORT] [PROTOCOL]

  ELB: DNS Name associated to ELB
  LOOP: Number of loops. Default value: 600 (ten minutes)
  URL: URL to check in ELB. Default value: /
  PORT: Port in ELB to check. Default value: 80
  PROTOCOL: Protocol to use. Default value: http

This shell script could be a good starting point to troubleshoot ELB issues. Fell free to use it!

#!/bin/bash
#
#
# Developed by: Javier Sianes - jsianes@gmail.com
#
# Shell script to test ELB request activity health
#
#
function on_exit()
{
if [ -f ${TMP} ]; then rm -f ${TMP}; fi
if [ -f ${TMP_ELB} ]; then rm -f ${TMP_ELB}; fi
if [ -f ${TMP_CUR} ]; then rm -f ${TMP_CUR}; fi
if [ -f ${TMP_OUT} ]; then rm -f ${TMP_OUT}; fi
if [ -f ${TMP_PER} ]; then rm -f ${TMP_PER}; fi
}
function usage()
{
echo "Error, incorrect parameters."
echo ""
echo "Usage: $0 ELB [LOOP] [URL] [PORT] [PROTOCOL]"
echo ""
echo " ELB: DNS Name associated to ELB"
echo " LOOP: Number of loops. Default value: 600 (ten minutes)"
echo " URL: URL to check in ELB. Default value: /"
echo " PORT: Port in ELB to check. Default value: 80"
echo " PROTOCOL: Protocol to use. Default value: http"
echo ""
exit 1
}
function percentile()
{
PERCT="0.95"
P_VALUE=`echo "scale=2; ((${PERCT}*${TOTAL})/1)" | bc`
BOOL=`echo ${P_VALUE} | grep ^[.] | wc -l`
if [ ${BOOL} -eq 1 ];then P_VALUE=0${P_VALUE}; fi
ACC=0
echo "" >${TMP_PER}
cat ${TMP} | grep -v FAIL | cut -d : -f 5 | sort -n |
while read VAR
do
ACC=`echo "scale=2; (${ACC}+${VAR})" | bc`
BOOL=`echo "${ACC}>${P_VALUE}" | bc`
if [ ${BOOL} -eq 1 ];then echo ${VAR} >${TMP_PER}; break; fi
done
PERCENT=`cat ${TMP_PER}`
if [ "${PERCENT}" = "" ]
then
PERCENT=`cat ${TMP} | grep -v FAIL | cut -d : -f 5 | sort -n | tail -1`
fi
if [ "${PERCENT}" = "" ];then PERCENT=0; fi
}
function payload()
{
ACC=0
while read VAR
do
PAY=`echo ${VAR} | cut -d : -f 4`
ACC=`echo "scale=2; (${ACC}+${PAY})/1" | bc`
done <${TMP}
NUMP=`cat ${TMP} | wc -l`
if [ ${NUMP} -eq 0 ]
then
PAYLOAD=0
else
PAYLOAD=`echo "scale=2; (${ACC}/${NUMP})/1" | bc`
fi
}
TIMEOUT="60"
TMP="/tmp/${RANDOM}${RANDOM}${RANDOM}${RANDOM}.TMP"
TMP_ELB="/tmp/${RANDOM}${RANDOM}${RANDOM}${RANDOM}.TMP"
TMP_CUR="/tmp/${RANDOM}${RANDOM}${RANDOM}${RANDOM}.TMP"
TMP_OUT="/tmp/${RANDOM}${RANDOM}${RANDOM}${RANDOM}.TMP"
TMP_PER="/tmp/${RANDOM}${RANDOM}${RANDOM}${RANDOM}.TMP"
trap on_exit EXIT
case $# in
1)
ELB=$1
LOOP=600
URL="/"
PORT=80
PROTOCOL="http" ;;
2)
ELB=$1
LOOP=$2
URL="/"
PORT=80
PROTOCOL="http" ;;
3)
ELB=$1
LOOP=$2
URL=$3
PORT=80
PROTOCOL="http" ;;
4)
ELB=$1
LOOP=$2
URL=$3
PORT=$4
PROTOCOL="http" ;;
5)
ELB=$1
LOOP=$2
URL=$3
PORT=$4
PROTOCOL=$5 ;;
*)
usage ;;
esac
dig a +short ${ELB} >${TMP_ELB} 2>/dev/null
NUM=0
ERROR=0
MAX=0
TOTAL=0
NUM_ELB=`cat ${TMP_ELB} | wc -l`
if [ ${NUM_ELB} -gt 0 ]
then
echo ""
echo "------------------------------------------------------------------------------"
echo "REQUEST TEST"
echo "------------------------------------------------------------------------------"
while [ ${NUM} -lt ${LOOP} ]
do
VALUE=`echo "(${RANDOM}%${NUM_ELB})+1" | bc`
ELB_IP=`sed "${VALUE}q;d" ${TMP_ELB}`
curl -s -k -I --connect-timeout ${TIMEOUT} -o ${TMP} -w "%{remote_ip}:%{remote_port}:%{size_request}:%{time_total}\n" "${PROTOCOL}://${ELB_IP}:${PORT}${URL}" >${TMP_CUR} 2>/dev/null
if [ $? -ne 0 ]
then
(( ERROR = ${ERROR}+1 ))
echo "FAIL:${ELB_IP}:${PORT}:0:0" | tee -a ${TMP_OUT}
else
HTTP_CODE=`cat ${TMP} | grep -i "200 ok" | wc -l`
if [ ${HTTP_CODE} -eq 0 ]
then
(( ERROR = ${ERROR}+1 ))
echo "FAIL:$(cat ${TMP_CUR} | sed 's/,/./g')" | tee -a ${TMP_OUT}
else
echo "OK:$(cat ${TMP_CUR} | sed 's/,/./g')" | tee -a ${TMP_OUT}
fi
REQUEST_TIME=`cat ${TMP_CUR} | cut -d : -f 4 | sed 's/,/./g'`
BOOL=`echo "${REQUEST_TIME}>${MAX}" | bc`
if [ ${BOOL} -eq 1 ]; then MAX=${REQUEST_TIME}; fi
TOTAL=`echo "(${TOTAL}+${REQUEST_TIME})" | bc`
fi
sleep 1
(( NUM = ${NUM}+1 ))
done
P_ERROR=`echo "scale=2; (((${ERROR}/${LOOP})*100)/1)" | bc`
BOOL=`echo ${P_ERROR} | grep ^[.] | wc -l`
if [ ${BOOL} -eq 1 ];then P_ERROR=0${P_ERROR}; fi
TOTAL_MINUTES=`echo "scale=2; ((${TOTAL}/60)/1)" | bc`
BOOL=`echo ${TOTAL_MINUTES} | grep ^[.] | wc -l`
if [ ${BOOL} -eq 1 ];then TOTAL_MINUTES=0${TOTAL_MINUTES}; fi
BOOL=`echo ${MAX} | grep ^[.] | wc -l`
if [ ${BOOL} -eq 1 ];then MAX=0${MAX}; fi
AVERAGE=`echo "scale=2; ((${TOTAL}/${LOOP})/1)" | bc`
BOOL=`echo ${AVERAGE} | grep ^[.] | wc -l`
if [ ${BOOL} -eq 1 ];then AVERAGE=0${AVERAGE}; fi
cat ${TMP_OUT} >${TMP}
percentile
payload
echo ""
echo "------------------------------------------------------------------------------"
echo "ELB \"${ELB}\": FINAL RESULTS"
echo "------------------------------------------------------------------------------"
echo ""
echo "Total requests: ${LOOP}"
echo "Total requests time: ${TOTAL} seconds (${TOTAL_MINUTES} minutes)"
echo "Maximum request time: ${MAX} seconds"
echo "Average request time: ${AVERAGE} seconds"
echo "Percentile (${PERCT}): ${PERCENT} seconds"
echo "Average payload size: ${PAYLOAD} bytes"
echo "Error rate: ${ERROR} of ${LOOP} (${P_ERROR}%)"
echo "IPs in ELB: ${NUM_ELB}"
NUM=1
while [ ${NUM} -le ${NUM_ELB} ]
do
ELB_IP=`sed "${NUM}q;d" ${TMP_ELB}`
cat ${TMP_OUT} | grep ${ELB_IP} >${TMP}
NUM_REQUEST=`cat ${TMP} | wc -l`
if [ ${NUM_REQUEST} -gt 0 ]
then
P_NREQUEST=`echo "scale=2; (((${NUM_REQUEST}/${LOOP})*100)/1)" | bc`
BOOL=`echo ${P_NREQUEST} | grep ^[.] | wc -l`
if [ ${BOOL} -eq 1 ];then P_NREQUEST=0${P_NREQUEST}; fi
ERROR_REQUEST=`cat ${TMP} | grep FAIL | wc -l`
P_ERROR_REQUEST=`echo "scale=2; (((${ERROR_REQUEST}/${NUM_REQUEST})*100)/1)" | bc`
BOOL=`echo ${P_ERROR_REQUEST} | grep ^[.] | wc -l`
if [ ${BOOL} -eq 1 ];then P_ERROR_REQUEST=0${P_ERROR_REQUEST}; fi
P_TOTAL_ERROR_REQUEST=`echo "scale=2; (((${ERROR_REQUEST}/${LOOP})*100)/1)" | bc`
BOOL=`echo ${P_TOTAL_ERROR_REQUEST} | grep ^[.] | wc -l`
if [ ${BOOL} -eq 1 ];then P_TOTAL_ERROR_REQUEST=0${P_TOTAL_ERROR_REQUEST}; fi
MAX=0
TOTAL=0
while read LINE
do
REQUEST_TIME=`echo ${LINE} | cut -d : -f 5`
BOOL=`echo "${REQUEST_TIME}>${MAX}" | bc`
if [ ${BOOL} -eq 1 ]; then MAX=${REQUEST_TIME}; fi
TOTAL=`echo "(${TOTAL}+${REQUEST_TIME})" | bc`
AVERAGE=`echo "scale=2; ((${TOTAL}/${NUM_REQUEST})/1)" | bc`
BOOL=`echo ${AVERAGE} | grep ^[.] | wc -l`
if [ ${BOOL} -eq 1 ];then AVERAGE=0${AVERAGE}; fi
done < ${TMP}
percentile
payload
else
P_NREQUEST=0
MAX=0
AVERAGE=0
PERCENT=0
ERROR_REQUEST=0
P_ERROR_REQUEST=0
P_TOTAL_ERROR_REQUEST=0
PAYLOAD=0
fi
echo " IP: ${ELB_IP}"
echo " Processed requests: ${NUM_REQUEST} of ${LOOP} (${P_NREQUEST}%)"
echo " Maximum request time: ${MAX} seconds"
echo " Average request time: ${AVERAGE} seconds"
echo " Percentile (${PERCT}): ${PERCENT} seconds"
echo " Average payload size: ${PAYLOAD} bytes"
echo " Error request: ${ERROR_REQUEST} of ${NUM_REQUEST} (${P_ERROR_REQUEST}% / ${P_TOTAL_ERROR_REQUEST}%)"
echo ""
(( NUM = ${NUM}+1 ))
done
fi
view raw testELB.sh hosted with ❤ by GitHub