Grafana Beyla 是一款基于 eBPF 的开源工具, 可支持Go、C/C++、Rust、Python、Ruby、Java、NodeJS、.NET 等语言的应用程序可观察性,本文介绍下beyla的使用入门。
官网文档声明Linux with Kernel 5.8以上内核才支持。测试环境基于Tencentos4 6.6.47-12.tl4.x86_64 内核
https://github.com/grafana/beyla
beyla支持在支持docker的容器环境和非容器环境编译,这里以在非容器环境编译为例说明:
wget https://go.dev/dl/go1.24.1.linux-amd64.tar.gz
sudo rm -rf /usr/local/go && tar -C /usr/local -xzf go1.24.1.linux-amd64.tar.gz
echo 'export PATH=$PATH:/usr/local/go/bin' >> $HOME/.bashrc
echo 'export GOPATH=$HOME/go' >> $HOME/.bashrc
echo 'export PATH=$PATH:$GOPATH/bin' >> $HOME/.bashrc
source ~/.bashrc2. 下载编译beylay
git clone https://github.com/grafana/beyla.git
cd beyla
make prereqs
make generate
make compile3. 代码修改后编译
修改代码后需要这样编译:
#需要先执行这一步,这一步会清理beyla/pkg/internal/ebpf/gotracer/下上一次编译生成的文件
如果不先执行这一步执行make generate不会重新编译修改过的代码:
make clean-ebpf-generated-files
编译bpf代码将c代码转为go程序
make generate
编译生成可执行文件
make compile1. 运行测试程序:
运行server端测试代码:
go run ./example-http-service.go
package main
import (
"fmt"
"io"
"log"
"math/rand"
"net/http"
"time"
)
// 20% of calls will fail with HTTP status 500.
func handleRequest(rw http.ResponseWriter, _ *http.Request) {
// time.Sleep(time.Duration(rand.Float64()*400.0) * time.Millisecond)
time.Sleep(time.Duration(500) * time.Millisecond)
if rand.Int31n(100) < 80 {
rw.WriteHeader(200)
if _, err := io.WriteString(rw, "Hello from the example HTTP service.\n"); err != nil {
log.Fatal(err)
}
} else {
rw.WriteHeader(500)
if _, err := io.WriteString(rw, "Simulating an error response with HTTP status 500.\n"); err != nil {
log.Fatal(err)
}
}
}
func main() {
fmt.Println("Listening on http://localhost:8080")
log.Fatal(http.ListenAndServe(":8080", http.HandlerFunc(handleRequest)))
}运行client端测试代码:
go run client-test.go
package main
import (
"fmt"
"io"
"net/http"
"os"
"time"
)
func main() {
const url = "http://localhost:8080"
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
for range ticker.C {
func() {
resp, err := http.Get(url)
if err != nil {
fmt.Fprintf(os.Stderr, "请求失败: %v\n", err)
return
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
fmt.Fprintf(os.Stderr, "读取响应失败: %v\n", err)
return
}
fmt.Printf("%s\n", body)
}()
}
}2. 配置beyla启动参数(可根据实际情况选配)
export BEYLA_TRACE_PRINTER=text //输出文本日志
export BEYLA_OPEN_PORT=8080 //监听服务端口,可与example-http-service搭配使用,也可单独使用
export BEYLA_EXECUTABLE_NAME=example-http-service //监控程序进程名
export BEYLA_PROMETHEUS_PORT=9400 //导出prometheus格式的监控数据到9400端
export BEYLA_LOG_LEVEL=debug //开启beyla log.Debug函数输出调试日志3. 运行beyla
cd beyla
# bin/beyla
time=2025-04-07T17:18:46.641+08:00 level=INFO msg="Grafana Beyla" Version=main Revision=74e5d63d "OpenTelemetry SDK Version"=1.34.0
time=2025-04-07T17:18:47.643+08:00 level=INFO msg="got host ID" component=ContextInfo func=fetchHostID fetcher=local hostID=0024bbb2bc9942a3a879044f450c7e16
time=2025-04-07T17:18:47.643+08:00 level=INFO msg="starting Beyla in Application Observability mode"
time=2025-04-07T17:18:47.644+08:00 level=INFO msg="using hostname" component=traces.ReadDecorator function=instance_ID_hostNamePIDDecorator hostname=VM-1-4-tencentos
time=2025-04-07T17:18:47.644+08:00 level=INFO msg="Starting main node" component=beyla.Instrumenter
time=2025-04-07T17:18:47.644+08:00 level=INFO msg="opening prometheus scrape endpoint" component=connector.PrometheusManager port=9400 path=/metrics
time=2025-04-07T17:18:48.953+08:00 level=INFO msg="instrumenting process" component=discover.TraceAttacher cmd=/tmp/go-build2817812094/b001/exe/example-http-service pid=256806 ino=43194975 type=go
2025-04-02 15:31:53.4233153 (0s[0s]) ProcessAlive 0 [ as :0]->[ as :0] size:0B svc=[example-http-service go] traceparent=[]
2025-04-07 17:18:49.4751849 (500.681372ms[500.653533ms]) HTTP 200 GET / [::1 as ::1:56306]->[::1 as ::1:8080] size:0B svc=[example-http-service go] traceparent=[00-4d364448d7c243a8d25d0c024435bd66-884aa96ca4d9bf6b[0000000000000000]-01]
2025-04-07 17:18:50.4751850 (500.665292ms[500.633923ms]) HTTP 200 GET / [::1 as ::1:56306]->[::1 as ::1:8080] size:0B svc=[example-http-service go] traceparent=[00-87a848263f4c8555cfe924221b910e73-eba9b7b87967eefb[0000000000000000]-01]
2025-04-07 17:18:51.4751851 (500.676621ms[500.649722ms]) HTTP 200 GET / [::1 as ::1:56306]->[::1 as ::1:8080] size:0B svc=[example-http-service go] traceparent=[00-b11677a89d2dab51301b8345223b4ecc-81e233875bd57b86[0000000000000000]-01]
# curl http://localhost:9400/metrics | tail -20
% Total % Received % Xferd Average Speed Time Time Time Current
Dload Upload Total Spent Left Speed
100 19164 0 19164 0 0 20.7M 0 --:--:-- --:--:-- --:--:-- 18.2M
http_server_request_duration_seconds_bucket{http_request_method="GET",http_response_status_code="500",http_route="/",instance="VM-1-4-tencentos:274459",job="example-http-service",server_address="::1",server_port="8080",service_name="example-http-service",service_namespace="",le="0.075"} 0
http_server_request_duration_seconds_bucket{http_request_method="GET",http_response_status_code="500",http_route="/",instance="VM-1-4-tencentos:274459",job="example-http-service",server_address="::1",server_port="8080",service_name="example-http-service",service_namespace="",le="0.1"} 0
http_server_request_duration_seconds_bucket{http_request_method="GET",http_response_status_code="500",http_route="/",instance="VM-1-4-tencentos:274459",job="example-http-service",server_address="::1",server_port="8080",service_name="example-http-service",service_namespace="",le="0.25"} 0
http_server_request_duration_seconds_bucket{http_request_method="GET",http_response_status_code="500",http_route="/",instance="VM-1-4-tencentos:274459",job="example-http-service",server_address="::1",server_port="8080",service_name="example-http-service",service_namespace="",le="0.5"} 0
http_server_request_duration_seconds_bucket{http_request_method="GET",http_response_status_code="500",http_route="/",instance="VM-1-4-tencentos:274459",job="example-http-service",server_address="::1",server_port="8080",service_name="example-http-service",service_namespace="",le="0.75"} 25
http_server_request_duration_seconds_bucket{http_request_method="GET",http_response_status_code="500",http_route="/",instance="VM-1-4-tencentos:274459",job="example-http-service",server_address="::1",server_port="8080",service_name="example-http-service",service_namespace="",le="1"} 25
http_server_request_duration_seconds_bucket{http_request_method="GET",http_response_status_code="500",http_route="/",instance="VM-1-4-tencentos:274459",job="example-http-service",server_address="::1",server_port="8080",service_name="example-http-service",service_namespace="",le="2.5"} 25
http_server_request_duration_seconds_bucket{http_request_method="GET",http_response_status_code="500",http_route="/",instance="VM-1-4-tencentos:274459",job="example-http-service",server_address="::1",server_port="8080",service_name="example-http-service",service_namespace="",le="5"} 25
http_server_request_duration_seconds_bucket{http_request_method="GET",http_response_status_code="500",http_route="/",instance="VM-1-4-tencentos:274459",job="example-http-service",server_address="::1",server_port="8080",service_name="example-http-service",service_namespace="",le="7.5"} 25
http_server_request_duration_seconds_bucket{http_request_method="GET",http_response_status_code="500",http_route="/",instance="VM-1-4-tencentos:274459",job="example-http-service",server_address="::1",server_port="8080",service_name="example-http-service",service_namespace="",le="10"} 25
http_server_request_duration_seconds_bucket{http_request_method="GET",http_response_status_code="500",http_route="/",instance="VM-1-4-tencentos:274459",job="example-http-service",server_address="::1",server_port="8080",service_name="example-http-service",service_namespace="",le="+Inf"} 25
http_server_request_duration_seconds_sum{http_request_method="GET",http_response_status_code="500",http_route="/",instance="VM-1-4-tencentos:274459",job="example-http-service",server_address="::1",server_port="8080",service_name="example-http-service",service_namespace=""} 12.516538845000001
http_server_request_duration_seconds_count{http_request_method="GET",http_response_status_code="500",http_route="/",instance="VM-1-4-tencentos:274459",job="example-http-service",server_address="::1",server_port="8080",service_name="example-http-service",service_namespace=""} 252. 配置prometheus自定义监控:


job_name: example-job-name
metrics_path: /metrics
cvm_sd_configs:
- region: ap-hongkong
ports:
- 9400
filters:
- name: instance-id
values:
- ins-jyi92lja # 替换「ins-xxxxxxxx」为实际 CVM 实例 ID
relabel_configs:
- source_labels: [__meta_cvm_instance_state]
regex: RUNNING
action: keep
- regex: __meta_cvm_tag_(.*)
replacement: $1
action: labelmap
- source_labels: [__meta_cvm_region]
target_label: region
action: replace3. 定义预聚合指标:


---
name: http_metrics
interval: 1m
rules:
- record: http_server_request_average_duration_seconds:1m
expr: |
# 分子:总耗时速率
sum by (job, instance) (
rate(http_server_request_duration_seconds_sum[1m]) # 关键点:扩展窗口
)
/
# 分母:请求次数速率
(sum by (job, instance) (
rate(http_server_request_duration_seconds_count[1m])
) + 1e-10) # 防止除零错误4. 查看基于http_server_request_duration_seconds_sum和http_server_request_duration_seconds_count定义的预聚合指标

/*用户层代码debug,日志输出到标准输出*/
export BEYLA_LOG_LEVEL=debug
/*ebpf内核代码debug,会输出到标准输出,如果要单独查看内核部分日志,可cat /sys/kernel/debug/tracing/trace查看*/
export BEYLA_BPF_DEBUG=1
beyla项目底层基于eBpf技术实现,代码架构分为内核部分(使用c实现)以及用户态部分(使用go语言实现)。
这里介绍下介绍关键函数快速了解beyla工具内核部分和用户态部分是如何协助的。
1.eBpf内核部分
beyla ebpf内核部分代码在beyla/bpf路径下,beyla通过kprobe内核tcp_cleanup_rbuf/tcp_recvmsge/tcp_sendmsg等函数获取报文数据进行解析。比如接收方向kprobe/tcp_cleanup_rbuf被调用时会调用return_recvmsg对数据进行处理后通过公共函数handle_buf_with_args进一步解析。

handle_buf_with_args判断是否为http报文,如果是http报文handle_buf_with_args通过尾调用bpf_tail_call(ctx, &jump_table, k_tail_protocol_http);最终调用SEC("kprobe/http") int beyla_protocol_http(void *ctx)进行解析。

如果非http报文handle_buf_with_args通过尾调用bpf_tail_call(ctx, &jump_table, k_tail_protocol_tcp);(对应beyla_protocol_tcp),最终在handle_unknown_tcp_connection通过分别统计报文接收和发送的时间戳计算出请求到响应的耗时,并将时间戳以及请求和发送报文的内容通过bpf_ringbuf_submit传送到用户态进行加工处理。
// k_tail_protocol_tcp
SEC("kprobe/tcp")
int beyla_protocol_tcp(void *ctx) {
call_protocol_args_t *args = protocol_args();
if (!args) {
return 0;
}
handle_unknown_tcp_connection(&args->pid_conn,
(void *)args->u_buf,
args->bytes_len,
args->direction,
args->ssl,
args->orig_dport);
return 0;
} jump_table的handler处理函数在./pkg/internal/ebpf/generictracer/generictracer.go的func (p *Tracer) SetupTailCalls()函数初始化,这里k_tail_protocol_tcp对应p.bpfObjects.BeylaProtocolTcp, BeylaProtocolTcp在/pkg/internal/ebpf/generictracer/bpf_tp_x86_bpfel.go文件里type bpf_tpProgramSpecs struct定义为*ebpf.Program ebpf:"beyla_protocol_tcp",也就是beyla_protocol_tcp函数
2. ebpf用户态部分:
cmd/beyla/main.go是beyla的用户态程序入口函数,执行components.RunBeyla调用./pkg/components/beyla.go文件的RunBeyla函数,
RunBeyla起一个常驻协程执行setupAppO11y,setupAppO11y执行instr.ReadAndForward()调用./pkg/internal/appolly/appolly.go的ReadAndForward()函数
执行 bp.Run(i.ctx)。
用户态协议解析在./pkg/internal/ebpf/common/tcp_detect_transform.go 文件的ReadTCPRequestIntoSpan函数
做判断协议类型,部分代码片段:
./pkg/internal/ebpf/common/common.go ReadBPFTraceAsSpan
->./pkg/internal/ebpf/common/tcp_detect_transform.go ReadTCPRequestIntoSpan

beyla支持如下应用层协议,不同的协议实现上有较大不同
这里以redis协议为例介绍beyla如何实现解析redis协议:
func ReadTCPRequestIntoSpan(cfg *config.EBPFTracer, record *ringbuf.Record,
filter ServiceFilter) (request.Span, bool, error) {
b := event.Buf[:l]//存放的request数据包payload
.....
//event.Rbuf[:rl]存放respond的数据包payload
switch {//isRedis(b) 判断request报文的payload是否是redis request
// isRedis(event.Rbuf[:rl] 判断respond报文的payload是否是redis respond
case isRedis(b) && isRedis(event.Rbuf[:rl]):
op, text, ok := parseRedisRequest(string(b))
if ok {
var status int
if op == "" {
op, text, ok = parseRedisRequest(string(event.Rbuf[:rl]))
if !ok || op == "" {
return request.Span{}, true, nil // ignore if we couldn't parse it
}
// We've caught the event reversed in the middle of communication, let's
// reverse the event
reverseTCPEvent(&event)
status = redisStatus(b)
} else {
status = redisStatus(event.Rbuf[:rl])
}
return TCPToRedisToSpan(&event, op, text, status), false, nil
}
default:
}const minRedisFrameLen = 3
func isRedis(buf []uint8) bool {
if len(buf) < minRedisFrameLen { //如果payload size小于3字节则不是redis协议
return false
}
return isRedisOp(buf)//判断是否是redis操作符
}
// nolint:cyclop
func isRedisOp(buf []uint8) bool {
if len(buf) == 0 {
return false
}
c := buf[0]//取payload第一个字节
switch c {
case '+'://第一个字节‘+’表示简单字符串,后面跟英文字符,比如"+OK\r\n"
return crlfTerminatedMatch(buf[1:], func(c uint8) bool {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '.' || c == ' ' || c == '-' || c == '_'
})
case '-': Errors,第一个字节为‘-’表示错误响应
return isRedisError(buf[1:])
// 字符':' 表示整数
// 字符'$' 表示批量字符串:Bulk Strings
// 字符'*' 表示数组Arrays
case ':', '$', '*'://第一个字节如果是这三个数据类型,后面必须跟的是数字和‘-’字符
return crlfTerminatedMatch(buf[1:], func(c uint8) bool {
return (c >= '0' && c <= '9') || c == '-'
})
}
return false
}
如果第一个字符‘-’后面的字符匹配下面这些任意一个错误前缀则返回真
func isRedisError(buf []uint8) bool {
return bytes.HasPrefix(buf, []byte("ERR ")) ||
bytes.HasPrefix(buf, []byte("WRONGTYPE ")) ||
bytes.HasPrefix(buf, []byte("MOVED ")) ||
bytes.HasPrefix(buf, []byte("ASK ")) ||
bytes.HasPrefix(buf, []byte("BUSY ")) ||
bytes.HasPrefix(buf, []byte("NOSCRIPT ")) ||
bytes.HasPrefix(buf, []byte("CLUSTERDOWN "))
}
func crlfTerminatedMatch(buf []uint8, matches func(c uint8) bool) bool {
cr := false
i := 0
for ; i < len(buf); i++ {
c := buf[i]
if matches(c) {
continue
}
if c == '\r' { RESP协议中的每一部分都是以 \r\n结尾,以\r来判断字符串长度
cr = true
break
}
return false
}
if !cr || i >= len(buf)-1 {
return false
}
return buf[i+1] == '\n'//RESP协议中字符串的最后一个字符为\n
}func isValidRedisChar(c byte) bool {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
c == '.' || c == ' ' || c == '-' || c == '_'
}
func parseRedisRequest(buf string) (string, string, bool) {
lines := strings.Split(buf, "\r\n") //将输入按 Redis 协议的分隔符 \r\n 分割
if len(lines) < 2 || len(lines[0]) == 0 {//检查最小长度要求(至少2行)和首行非空
return "", "", false
}
//在调用parseRedisRequest解析请求前,已经通过isRedisOp判断第一行是否是redis协议,座椅这里只需要通过第一行的第一个字符'*'来区分是redis请求还是响应
// It's not a command, something else?
if lines[0][0] != '*' { //检查首字符是否为 *(Redis 数组格式)不是数组格式直接返回成功(但空操作),可能是响应而非请求
return "", "", true
}
op := "" //用于保存Redis命令(如 GET/SET)
text := "" //构建完整命令字符串
read := false //状态标志(是否准备读取值)
// Skip the first line
for _, l := range lines[1:] { //redis请求从第二行开始解析,第一行只有第一个字符的'*'说明是redis请求
if len(l) == 0 {
continue
}
if !read {
if isRedisOp([]uint8(l + "\r\n")) {//当 read=false 时,检测当前行是否是 Redis 操作符行,比如$
read = true
} else {
break
}
} else {
if isRedisOp([]uint8(l + "\r\n")) {//使用 isRedisOp 函数检测 $(批量字符串)、*(数组)等前缀
text += "; "//检测到新的操作符行:添加命令分隔符 ';'
continue
}
if !isValidRedisChar(l[0]) {//读到非法的redis字符直接跳出循环
break
}
if op == "" {//当op未赋值时说明for循环第一次执行到这,该变量用于保存redis命令比如get/set
op = l
}
text += l + " "// text用于保存完整的命令字符串,比如set key val
read = false //重置 read 状态准备下一个操作符
}
}
举例:
//单个命令*2\r\n$3\r\nGET\r\n$5\r\nmykey\r\n 解析结果text变量内容为 text = "GET mykey " 变量op = "GET"
//流水线命令*1\r\n$4\r\nPING\r\n*2\r\n$3\r\nGET\r\n$3\r\nkey\r\n 解析结果text变量内容为 text = "PING ; GET key " 变量op = "PING"
return op, text, true
}使用redis简单测试下beyla的解析内容:
redis-cli执行set
127.0.0.1:6666> set abc hello OK
beyla输出redis内容格式:
ubuf=[*3 $3 set $3 abc $5 hello ], pid=3373114, len=33
[+OK ], pid=3373114, len=5
redis-cli执行get请求
127.0.0.1:6666> get abc "hello"
beyla输出redis内容格式:
[*2 $3 get $3 abc
], pid=3373114, len=22
[$5 hello
], pid=3373114, len=11
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。