当前位置:网站首页>Service reliability guarantee -watchdog
Service reliability guarantee -watchdog
2022-07-28 06:02:00 【King_ Kwin】
background : If the service fails in the production environment , Affect production , So I wrote a simple service guarantee script , With the help of linux Of crontab Conduct watchdog The protection of , It can ensure that the service is pulled up after the server is restarted
The configuration file
[SYS]
# The maximum number of failures
max_fail_size=3
# Monitoring cycle
sleep_time=20
# Single log file 10M
log_size=10000000
[SERVICE]
# Listening service process
main_process=/**/**/**/**/**.jar
# Port number monitored
port=9012
# Service start command
restart_command=bash /**/**/**/**/bin/start.sh restart
[END]watchdog Script ( Be careful crontab The executed script cannot read environment variables , Need to read manually : Code 2、3 That's ok )
#!/bin/bash
. /etc/profile
. ~/.bash_profile
CURRENT_PATH=`cd \`dirname $0\`; pwd`
WATCHDOG_FILE=$CURRENT_PATH"/watchdog.sh"
CONF_FILE=$CURRENT_PATH"/conf.ini"
LOCK_FILE=$CURRENT_PATH"/watch.lock"
LOG_PATH=$CURRENT_PATH"/log"
mkdir $LOG_PATH
LOG_FILE=$LOG_PATH"/watch.log"
MAX_SIZE=10000000
help() {
echo "help: watchdog.sh start|exist"
}
archiveLog() {
text=`ls -rvl $LOG_PATH | awk '{print $9}'`
i=1
for element in $text
do
DATE=`date '+%Y-%m-%d %H:%M:%S'`
echo "$DATE The first $i Behavior $element" >> $LOG_FILE
num=${element:10}
num=`expr $num + 1`
ori="$LOG_PATH/$element"
if [ $num -ge 10 ]; then
rm -rf $ori
elif [ $num -eq 1 ]; then
tmp="${element:0:10}.$num"
dest="$LOG_PATH/$tmp"
mv $ori $dest
else
tmp="${element:0:10}$num"
dest="$LOG_PATH/$tmp"
mv $ori $dest
fi
i=`expr $i + 1`
done
}
logInfo() {
if [ -f $LOG_FILE ];then
log_zise=`ls -l $LOG_FILE | awk '{print $5}'`
if [ $log_zise -ge $MAX_SIZE ]; then
archiveLog
fi
fi
DATE=`date '+%Y-%m-%d %H:%M:%S'`
echo "$DATE $1" >> $LOG_FILE
}
getParam() {
paraType=$1
paraKey=$2
DATE=`date '+%Y-%m-%d %H:%M:%S'`
echo "$DATE paraType:$paraType" >> $LOG_FILE
echo "$DATE paraKey:$paraKey" >> $LOG_FILE
command="awk -F '=' '/\[$paraType\]/{a=1}a==1&&\$1~/$paraKey/{print \$2;exit}' $CONF_FILE"
DATE=`date '+%Y-%m-%d %H:%M:%S'`
echo "$DATE command:$command" >> $LOG_FILE
ret=`eval ${command}`
TMP_RET=$ret
echo "$DATE resp is $ret" >> $LOG_FILE
}
initParams() {
# The main process
getParam SERVICE main_process
SERVICE_MAIN=$TMP_RET
# Monitor port number
getParam SERVICE port
SERVICE_PORT=$TMP_RET
# Start script path
getParam SERVICE restart_command
RESTART_COMMAND=$TMP_RET
##### The system configuration #####
# The maximum number of failures
getParam SYS max_fail_size
MAX_FAIL_SIZE=$TMP_RET
# Monitoring cycle
getParam SYS sleep_time
SLEEP_TIME=$TMP_RET
# Single log file size
getParam SYS log_size
MAX_SIZE=$TMP_RET
}
exist() {
#initParams
if [ -e ${LOCK_FILE} ] && kill -0 `cat ${LOCK_FILE}`; then
DATE=`date '+%Y-%m-%d %H:%M:%S'`
echo "$DATE lock file exist" >> $LOG_FILE
for line in `cat ${LOCK_FILE}`
do
ret=$(ps -ef |grep $line|grep $WATCHDOG_FILE|grep -v grep |wc -l)
if [ $ret -ge 1 ]; then
DATE=`date '+%Y-%m-%d %H:%M:%S'`
echo "$DATE watchdog already running" >> $LOG_FILE
return 1
fi
done
fi
initParams
logInfo 'watchdog stopped'
return 0
}
start() {
exist
if [ $? -gt 0 ]; then
DATE=`date '+%Y-%m-%d %H:%M:%S'`
echo "$DATE watchdog is running" >> $LOG_FILE
exit -1
fi
# Ensure that when you exit , The lock file is deleted
trap "rm -f ${LOCK_FILE}; exit" INT TERM EXIT
# Put the current program process id Write lock file
echo $$ > ${LOCK_FILE}
fail_size=0
fail_port_size=0
while true; do
logInfo 'check progress'
ret=`ps -ef|grep $SERVICE_MAIN|grep -v grep|wc -l`
if [ $ret -eq 0 ]; then
fail_size=`expr $fail_size + 1`
logInfo 'check progress fail size '$fail_size
if [ $fail_size -ge $MAX_FAIL_SIZE ];then
#cd $START_PATH
logInfo 'fail time reach max ,reload'
#bash start.sh restart
`eval ${RESTART_COMMAND}`
fail_size=0
fi
else
logInfo 'progress exist'
fail_size=0
fi
if [ $fail_size -eq 0 ]; then
logInfo 'check port'
#ret=`lsof -i:$SERVICE_PORT|grep $SERVICE_PORT|wc -l`
#ret=`netstat -an |grep :$SERVICE_PORT| wc -l`
ret=`netstat -an | grep ":$SERVICE_PORT " | awk '($1 == "tcp" || $1 == "tcp6") && $NF == "LISTEN" {print $0}'| wc -l`
if [ $ret -eq 0 ]; then
fail_port_size=`expr $fail_port_size + 1`
logInfo 'check port fail size '$fail_port_size
if [ $fail_port_size -ge $MAX_FAIL_SIZE ];then
#cd $START_PATH
logInfo 'fail time reach max ,reload'
# bash start.sh restart
`eval ${RESTART_COMMAND}`
fail_port_size=0
fi
else
logInfo 'port used'
fail_port_size=0
fi
fi
sleep $SLEEP_TIME
done
# Delete lock file
rm -f ${LOCK_FILE}
}
case $1 in
'start')
start
;;
'exist')
exist
;;
*)
help
;;
esaccrontab To configure :
crontab -e Enter editor

cron The expression is executed every minute
边栏推荐
- Mars number * word * Tibet * product * Pingtai defender plan details announced
- CertPathValidatorException:validity check failed
- MySQL view, stored procedure and stored function
- ModuleNotFoundError: No module named ‘pip‘
- The difference between null and undefined
- 第九章 子查询(重点)
- ES6 new data types -- > symbol, map, set
- 1:开启慢查询日志 与 找到慢SQL
- 【一】redis简介
- 服务可靠性保障-watchdog
猜你喜欢

【二】redis基础命令与使用场景

记某淘宝客软件分析拿库思路

Community epidemic access management system based on PHP (PHP graduation design)

记录下在线扩容服务器遇到的问题 NOCHANGE: partition 1 is size 419428319. it cannot be grown

(php毕业设计)基于php学生日常行为管理系统获取

Mars数*字*藏*品*平*台守卫者计划细节公布

(php毕业设计)基于php学生作业提交管理系统获取

(php毕业设计)基于thinkphp5小区物业管理系统

ctfshow单身狗 -- web

南京邮电大学CTF题目writeup (二) 含题目地址
随机推荐
【四】redis持久化(RDB与AOF)
MySQL view, stored procedure and stored function
数字藏品成文旅产业新热点
raise RuntimeError(‘DataLoader worker (pid(s) {}) exited unexpectedly‘.format(pids_str))RuntimeErro
Briefly understand MVC and three-tier architecture
简单理解一下MVC和三层架构
Assembly打包
2:为什么要读写分离
Books - mental models
On how digital collections and entities can empower each other
CertPathValidatorException:validity check failed
Mysql的两种覆盖表中重复记录的方法
(php毕业设计)基于php甘肃旅游网站管理系统获取
raise RuntimeError(‘DataLoader worker (pid(s) {}) exited unexpectedly‘.format(pids_str))RuntimeErro
基于php心理健康服务系统获取(php毕业设计)
Chapter 7 single line function
Books - smart investors
Flex elastic box
Books - mob
regular expression