为了在某个服务告警的时候,更好的评估其影响面,想到了从skywalking中捞出这些关系,在告警推送的时候带上相关数据,这样做到心中有数。
话不多说,上代码。
TIPS:skywalking的数据实际上是存在es中的,下面step1 step2实际上就是查询ES中的几个索引。
# 下面的这个例子的时间范围 2025-02-24 14:49 到 2025-02-24 15:04
curl 'https://skywalking.demo.com/graphql' \
-H 'authority: skywalking.demo.com' \
-H 'accept: application/json, text/plain, */*' \
-H 'content-type: application/json;charset=UTF-8' \
-d $'{"query":"query queryServices($duration: Duration\u0021,$keyword: String\u0021) {\\n services: getAllServices(duration: $duration, group: $keyword) {\\n key: id\\n label: name\\n group\\n }\\n }","variables":{"duration":{"start":"2025-02-24 1449","end":"2025-02-24 1504","step":"MINUTE"},"keyword":""}}' \
--compressed
结果类似如下:
{
"data": {
"services": [
{
"key": "ZmFzdC1jb25maWctc2VydmljZQ==.1",
"label": "devops-abcd-service",
"group": "group01"
},
{
"key": "ZmFzdC1kYXRhLWFkbWlu.1",
"label": "devops-data-admin",
"group": "group01"
},
{
"key": "cGJtLWNsYWltLXNwZWNpYWwtbGluZS1zZXJ2aWNl.1",
"label": "devops-cicd-service",
"group": "group02"
},
{
"key": "cGJtLW91dGVyLXNlcnZpY2U=.1",
"label": "devops-id-service",
"group": "group03"
}
]
}
}
补充:step1这个结果可以定期持久化数据库或者缓存里,降低对skywalking的查询压力。
这里以 devops-cicd-service 为例 ,获取对应时间段的拓扑关系。
入参1、devops-cicd-service 的serviceid,也就是 cGJtLWNsYWltLXNwZWNpYWwtbGluZS1zZXJ2aWNl.1
入参2、时间范围 2025-02-24 14:49 到 2025-02-24 15:04
curl --location 'https://skywalking.demo.com/graphql' \
--header 'authority: skywalking.demo.com' \
--header 'accept: application/json, text/plain, */*' \
--header 'Content-Type: application/json' \
--data '{"query":"query queryServiceTopo($duration: Duration!, $serviceId: ID!) {\n topo: getServiceTopology(duration: $duration, serviceId: $serviceId) {\n nodes {\n id\n name\n type\n isReal\n }\n calls {\n id\n source\n detectPoints\n target\n }\n }}","variables":{"serviceId":"cGJtLWNsYWltLXNwZWNpYWwtbGluZS1zZXJ2aWNl.1","duration":{"start":"2025-02-24 1449","end":"2025-02-24 1504","step":"MINUTE"}}}'
上面的写法,是输出nodes和calls清单。 如果不要输出calls的调用链,只输出nodes段【推荐这种写法】,请求如下:
curl --location 'https://skywalking.demo.com/graphql' \
--header 'authority: skywalking.demo.com' \
--header 'accept: application/json, text/plain, */*' \
--header 'Content-Type: application/json' \
--header 'Cookie: acw_tc=0b68a82217403840173955477e0ef55c4adac5f7de9414d6fd0fa549226972' \
--data '{"query":"query queryServiceTopo($duration: Duration!, $serviceId: ID!) {\n topo: getServiceTopology(duration: $duration, serviceId: $serviceId) {\n nodes {\n id\n name\n type\n isReal\n }\n }}","variables":{"serviceId":"cGJtLWNsYWltLXNwZWNpYWwtbGluZS1zZXJ2aWNl.1","duration":{"start":"2025-02-24 1549","end":"2025-02-24 1601","step":"MINUTE"}}}'
只输出 nodes 的结果类似如下:
{
"data": {
"topo": {
"nodes": [
{
"id": "VXNlcg==.0",
"name": "User",
"type": "USER",
"isReal": false
},
{
"id": "dHBhLWFAAAAAAo4MA==.0",
"name": "api.demo.com:80",
"type": "HttpClient",
"isReal": false
},
{
"id": "bS10ay1jbiBBBBBBRrYyDM=.0",
"name": "api2.demo.cn:443",
"type": "HttpClient",
"isReal": false
},
{
"id": "MTkyLjE2OCCCCCCC6MzEwODE=.0",
"name": "192.168.11.123:31081",
"type": "OKHttp",
"isReal": false
},
{
"id": "ZmFzdC1pbnN1cmDDDDDRtaWQ==.1",
"name": "devops-dba-service",
"type": "SpringMVC",
"isReal": true
},
{
"id": "emlwa2luLm1lZEEEEEaCxMQ==.0",
"name": "zipkin.demo.com:9411",
"type": "SpringRestTemplate",
"isReal": false
},
{
"id": "ZmFzdC1DDDDDDDDDDS1zZXJ2aWNl.1",
"name": "devops-login-service",
"type": "Undertow",
"isReal": true
},
{
"id": "cGJtLW9DDDDDDDDNlcnZpY2U=.1",
"name": "devops-id-service",
"type": "Undertow",
"isReal": true
},
{
"id": "dGVzdC1mDDDDDDDDDDDDDDm1lZwNg==.0",
"name": "devops-mysql.demo.com:3306",
"type": "Mysql",
"isReal": false
},
{
"id": "c2luZ2xlLXJlZGlzDDDDDV06NjM3OA==.0",
"name": "devops-redis.demo.com:6379",
"type": "Redis",
"isReal": false
},
{
"id": "ZmFzdC1pbnN1DDDDDmF0Y2ljZQ==.1",
"name": "devops-batch-service",
"isReal": true
},
{
"id": "dGVzdC5ybDDDDDwaSOTg3Ng==.0",
"name": "devops-mq.demo.com:9876",
"type": "RabbitMQ",
"isReal": false
},
{
"id": "cGJtLWNsYWltLXNwZWNpYWwtbGluZS1zZXJ2aWNl.1",
"name": "devops-cicd-service",
"type": "Undertow",
"isReal": true
},
{
"id": "ZmFzdC1pbnN1DDDDDDDD2aWNl.1",
"name": "devops-platform-service",
"type": "Undertow",
"isReal": true
}
]
}
}
}
过滤后结果如下:
User
api.demo.com:80
api2.demo.cn:443
192.168.11.123:31081
devops-dba-service
zipkin.demo.com:9411
devops-login-service
devops-id-service
devops-mysql.demo.com:3306
redis.demo.com:6379
devops-batch-service
devops-mq.demo.com:9876
devops-cicd-service
devops-platform-service
补充: step2这个可以不用定期对查询持久化,一般在触发告警的时候才会执行这个获取服务依赖的操作。当然如果为了给机器学习做AIOPS训练用的话另说。
这里存到了PG中,给时间列加了索引,小规模场景下估计不会存在性能问题。
也可以把最近一段时间的数据存一份到Redis中,降低对数据库的压力。
-- 安装pg扩展
CREATE EXTENSION IF NOT EXISTS ltree;
-- 创建一个存储服务依赖关系的表
CREATE TABLE service_dependencies (
id SERIAL PRIMARY KEY,
create_time timestamp default CURRENT_TIMESTAMP,
service_path ltree
);
create index on service_dependencies(create_time);
-- 插入一些测试数据(注意ltree不支持-和:,我这里在数据入库统一替换成了_下划线)
INSERT INTO service_dependencies (service_path)
VALUES
('devops_abc_service.abc.demo.com_31081'),
('devops_abc_service.devops_cicd_service'),
('devops_abc_service.devops_dba_service'),
('devops_abc_service.single_redis.demo.com_6378'),
('devops_abc_service.test_mysql.demo.com_3306'),
('devops_abc_service.www.demo.com_443')
;
-- 查询指定时间段
postgres=# select distinct service_path from service_dependencies WHERE create_time BETWEEN '2025-02-25 13:23:16' AND NOW() AND service_path ~ 'devops_abc_service.*';
service_path
-----------------------------------------------
devops_abc_service.abc.demo.com_31081
devops_abc_service.devops_cicd_service
devops_abc_service.devops_dba_service
devops_abc_service.devops_redis.demo.com_6378
devops_abc_service.devops_mysql.demo.com_3306
devops_abc_service.www.demo.com_443
(6 rows)
这里可以使用python处理下数据。
a=['devops_abc_service.192.168.11.123_31081', 'devops_abc_service.devops_cicd_service', 'devops_abc_service.devops_dba_service']
b=[]
for i in a:
b.append(i.replace('devops_abc_service.',''))
print(b)
#结果 ['192.168.11.123_31081', 'devops_cicd_service', 'devops_dba_service']
import networkx as nx
import matplotlib.pyplot as plt
# 给定的服务依赖关系数据
data = ['devops_abc_service.abc.demo.com_31081',
'devops_abc_service.devops_cicd_service',
'devops_abc_service.devops_dba_service',
'devops_abc_service.devops_redis.demo.com_6378',
'devops_abc_service.devops_mysql.demo.com_3306',
'devops_abc_service.www.demo.com_443']
# 创建一个有向图对象
G = nx.DiGraph()
# 解析数据并添加节点和边到图中
for item in data:
parts = item.split('.')
service = parts[0]
dependency = '.'.join(parts[1:])
# 添加服务节点
G.add_node(service)
# 添加依赖项节点
G.add_node(dependency)
# 添加从服务到依赖项的有向边
G.add_edge(service, dependency)
# 设置图片清晰度
plt.rcParams['figure.dpi'] = 300
# 调整图形大小
plt.figure(figsize=(12, 8))
# 调整节点布局,使用不同的布局算法或调整参数
pos = nx.spring_layout(G, k=0.5)
# 绘制节点
nx.draw_networkx_nodes(G, pos, node_color='lightblue', node_size=2000)
# 绘制边
nx.draw_networkx_edges(G, pos, edge_color='gray')
# 绘制节点标签,调整字体大小
nx.draw_networkx_labels(G, pos, font_size=8, font_family='sans-serif')
# 添加标题
plt.title('Service Dependency Graph')
# 关闭坐标轴
plt.axis('off')
# 保存图片
image_path = 'service_dependency_graph.png'
plt.savefig(image_path, bbox_inches='tight')
# -*- coding: utf-8 -*-
import requests
from datetime import datetime, timedelta
import json
import mysql.connector
"""
表结构:
use sbtest;
CREATE TABLE `service_deps_log` (
`id` bigint NOT NULL AUTO_INCREMENT,
`create_time` datetime DEFAULT NULL,
`service_name` varchar(200) DEFAULT NULL,
`dependency_list` text,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci
"""
skywalking_graphql_url = 'http://192.168.31.181:12800/graphql'
headers = {
'accept': 'application/json, text/plain, */*',
'content-type': 'application/json;charset=UTF-8'
}
# 采集10分钟时间窗口的数据
current_time = datetime.now()
start_time = current_time - timedelta(minutes=10)
# 将时间转换为指定格式的字符串
start_str = start_time.strftime('%Y-%m-%d %H:%M')
end_str = current_time.strftime('%Y-%m-%d %H:%M')
start_str2 = start_time.strftime('%Y-%m-%d %H%M')
end_str2 = current_time.strftime('%Y-%m-%d %H%M')
def insert_service_log(service_name,dependency_list):
try:
connection = mysql.connector.connect(
host='192.168.31.181',
database='sbtest',
user='sysbench',
password='sysbench',
)
if connection.is_connected():
cursor = connection.cursor()
current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
service_name = service_name
dependency_list = ast.literal_eval(str(dependency_list))
insert_query = """
INSERT INTO service_deps_log (create_time, service_name, dependency_list) VALUES (%s, %s, %s);
"""
records = []
for dep_svc_item in dependency_list:
records.append((current_time, service_name, dep_svc_item))
cursor.executemany(insert_query, records)
connection.commit()
print(f"成功插入 {cursor.rowcount} 条记录")
except Exception as e:
print(f"Error: {e}")
def get_service_list():
query = """
query queryServices($duration: Duration!, $keyword: String!) {
services: getAllServices(duration: $duration, group: $keyword) {
key: id
label: name
group
}
}
"""
variables = {
"duration": {
"start": start_str,
"end": end_str,
"step": "MINUTE"
},
"keyword": ""
}
data = {
"query": query,
"variables": variables
}
response = requests.post(skywalking_graphql_url, headers=headers, json=data)
res = response.json()
return json.dumps(res)
def get_service_topo(key_label_pairs):
key_label_pairs = key_label_pairs
service_id = key_label_pairs["key"]
query = """
query queryServiceTopo($duration: Duration!, $serviceId: ID!) {
topo: getServiceTopology(duration: $duration, serviceId: $serviceId) {
nodes {
id
name
type
isReal
}
}
}
"""
variables = {
"serviceId": service_id,
"duration": {
"start": start_str2,
"end": end_str2,
"step": "MINUTE"
}
}
data = {
"query": query,
"variables": variables
}
response = requests.post(skywalking_graphql_url, headers=headers, json=data)
res = response.json()
if len(res) != 0:
names = []
for node in res["data"]["topo"]["nodes"]:
if node["name"] not in ["User"]:
names.append(node["name"])
print(f"依赖的服务清单: {names}")
return names
service_list = get_service_list()
service_list_dict = eval(service_list)["data"]["services"]
for service in service_list_dict:
dependency_list_str = get_service_topo(service)
insert_service_log(service['label'], dependency_list_str)
略,目前grafana插件不支持类似neo4j的WebUI展示,只能用table方式展示。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。