1 2 3 4 5 6 7 8 |
# nginx需添加以下配置 server { listen 80; server_name localhost; location /upstream_status { check_status; } } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
#!/usr/bin/env python #coding:utf8 import urllib2 import json nginx_server = '127.0.0.1' url = 'http://' + nginx_server +'/upstream_status?format=csv&status=down' response = urllib2.urlopen(url) result = response.readlines() message = u'Nginx后端检测异常(' + nginx_server + u'):\n' file_name = '/tmp/upstream_check_' + nginx_server output = {} def SendToDingTalk(message): url = 'https://oapi.dingtalk.com/robot/send?access_token=xxxxxx' headers = { "Content-Type": "application/json", "Charset": "UTF-8" } data = { "msgtype": "text", "text": { "content": message } } request = urllib2.Request(url, data=json.dumps(data), headers=headers) urllib2.urlopen(request) if not result: # 发送告警恢复消息 try: with open(file_name, 'r') as f: content = f.read() except: content = '' if len(content) > 0: message = u'Nginx后端检测异常已恢复(' + nginx_server + u')' SendToDingTalk(message) # 清空告警文件内容 with open(file_name, 'w') as f: f.truncate() exit() for line in result: upstream = line.split(',')[1] server = line.split(',')[2] # 生成字典 if output.has_key(upstream): output[upstream].append(server) else: output[upstream] = [server] for upstream, server in output.items(): # 计算可用率 total_server_count = 0 alive_server_count = 0 url = 'http://' + nginx_server +'/upstream_status?format=csv' response = urllib2.urlopen(url) for line in response.readlines(): if line.split(',')[1] == upstream: total_server_count += 1 if line.split(',')[3] == 'up': alive_server_count +=1 alive_rate = format(float(alive_server_count) / float(total_server_count) * 100, '.2f') # 生成告警消息 message += 'upstream => ' + upstream + u', 可用率 => ' + alive_rate + u'%, 异常节点 => ' + str(server) + '\n' # 判断告警内容是否与上一次一致,一致则退出 message += u'详情访问:http://' + nginx_server + '/upstream_status?status=down' try: with open(file_name, 'r') as f: content = f.read() except: content = '' if message.encode('utf-8') == content: exit() # 将告警内容覆盖写入文件,方便下一次告警判断 with open(file_name, 'w') as f: f.write(message.encode('utf-8')) SendToDingTalk(message) |
1 2 |
#配置cron计划任务 * * * * * python nginx-upstream-check.py |
告警消息示例:
1 2 3 4 |
Nginx后端检测异常(127.0.0.1): upstream => test_upstream_1, 可用率 => 50.00%, 异常节点 => ['172.16.0.2:80'] upstream => test_upstream_2, 可用率 => 0.00%, 异常节点 => ['172.16.0.1:8080', '172.16.0.2:8080'] 详情访问:http://127.0.0.1/upstream_status?status=down |
恢复消息示例:
1 |
Nginx后端检测异常已恢复(127.0.0.1) |