1、使用了Python的requests模块,是因为服务器本来就是Python2,使用这个已安装的模块也比较熟悉。json是用来读数的,吧rabbitmq返回的json数组去除字符串。time和datetime都是获取时间点。
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import requests, json, time, datetime
from requests.auth import HTTPBasicAuth
import sys
username = '********'
password = '********'
queue_url = 'http://10.3.22.19:15672/api/queues'
overview_url = 'http://10.3.22.19:15672/api/overview'
nodes_url = 'http://10.3.22.19:15672/api/nodes'
nodes_num = 0
ignore_queue = ["Youku_OPFlowPkgSubsSync_delay_queue","dlx_Youku_OPFlowPkgSubsSync_queue","order_status_changed_notify_queue","polling_order_status_queue_200056","dlx_polling_order_status_queue","polling_order_status_queue_200117"]
msg = "%s 消息队列堆积异常:" % (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())),)
queues_num_msg = "队列总数是{},参考数是217,这是绝对数,需要注意!"
message_num_msg = "消息总数是{},没有参考数,订单多的时候就多,订单少的时候就少。"
consumers_num_msg = "消费者总数是{},参考数是14761,仅供参考。"
messages_ready_msg = "待消费的消息数是{},没有参考数值,一般情况下,待消费消息应该和消息总数一样,因为程序设定消费完就确认删除。"
messages_unacknowledged_msg = "待确认的消息数量是{},没有参考值,如果太多,说明程序有消费,但是没确认的消息数。"
publish_details_msg = "消息发布率{},消息生产速度,时间单位秒,如果为零,需要注意订单受理情况。"
deliver_rate_msg = "邮件传递率{},消息消费速度,时间单位秒,如果为零,需要注意消费者和单列消费者工作状态。"
nodes_num_msg = "健康节点一共{}个,参考值4,如果小于4,需要尽快处理。"
messages_details_msg = "总消息数的增速为每秒{},仅供判断,没有参考值。"
ove = requests.get(url=overview_url, auth=HTTPBasicAuth(username=username, password=password), timeout=3)
nodes = requests.get(url=nodes_url, auth=HTTPBasicAuth(username=username, password=password), timeout=3)
if ove.status_code == 200:
overview = json.loads(ove.text)
queues_num = overview.get("object_totals").get("queues")
consumers_num = overview.get("object_totals").get("consumers")
messages_num = overview.get("queue_totals").get("messages")
messages_ready = overview.get("queue_totals").get("messages_ready")
messages_unacknowledged = overview.get("queue_totals").get("messages_unacknowledged")
messages_details = overview.get("queue_totals").get("messages_details").get("rate")
deliver_rate = overview.get("message_stats").get("deliver_details").get("rate")
publish_details = overview.get("message_stats").get("publish_details").get("rate")
if nodes.status_code == 200:
all_nodes = json.loads(nodes.text)
for n in all_nodes:
partitions = n.get("partitions")
nodes_num = nodes_num+1
def queue_num():
que = requests.get(url=queue_url, auth=HTTPBasicAuth(username=username, password=password), timeout=3)
if que.status_code == 200:
queues = json.loads(que.text)
for queue in queues:
if queue['name'] in ignore_queue:
continue
queue_count = int(queue.get("messages", 0))
if queue_count>10000:
print msg,queue['name'],queue_count
return
print nodes_num_msg.format(nodes_num)
print queues_num_msg.format(queues_num)
print consumers_num_msg.format(consumers_num)
print message_num_msg.format(messages_num)
print messages_ready_msg.format(messages_ready)
print messages_unacknowledged_msg.format(messages_unacknowledged)
print messages_details_msg.format(messages_details)
print publish_details_msg.format(publish_details)
print deliver_rate_msg.format(deliver_rate)
print "\n*以上是 %s rabbitmq的巡检邮件,下面是有队列记录,才是异常情况:" % (time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())),)
queue_num()