在日常访问RGW过程中,一般会在RGW前端架设Nginx,并通过Nginx日志来统计或者分析用户请求,但是默认Nginx日志字段是不含bucket_name的,如何通过ELK组件来实现对Nginx日志字段进行补充,添加上bucket_name字段呢?于是就有了下面的内容。
通常访问一个S3的资源,我们可以使用两种类型的URL格式
虚拟主机格式(virtual-hosted style)-> GET http://bucket_name.s3.demo.local/objectname 相对路径格式 (path-style)-> GET http://s3.demo.local/bucket_name/objectname
而在Nginx日志中可以通过http_host和request_uri字段来判断请求具体属于上面的哪种类型。
Nginx的Access Log以JSON格式进行保存,然后通过Filebeat推送到Kafka,之后再由Logstash拉取数据并处理后存储到ES。
为缩小篇幅,下面操作中减少了kafka和ES,直接从Filebeat->Logstash进行日志处理
nginx日志配置如下
log_format json '{"scheme":"$scheme","http_host":"$http_host",
"remote_addr":"$remote_addr","server_addr":"$server_addr",
"time_local":"[$time_local]","request":"$request","status":$status,
"body_bytes_sent":$body_bytes_sent,"http_referer":"$http_referer",
"http_user_agent":"$http_user_agent","upstream_addr":"$upstream_addr",
"upsteam_response_time":"$upstream_response_time","request_time":"$request_time",
"http_x_forwarded_for":"$http_x_forwarded_for","content_length":"$content_length",
"request_length":$request_length,"request_method":"$request_method",
"server_protocol":"$server_protocol","request_uri":"$request_uri"}';
access_log /var/log/nginx/access.log json;
[root@demo cephuser]# cat /etc/filebeat/filebeat.yml
filebeat.prospectors:
- type: log
enabled: true
paths:
- /var/log/nginx/access.log
json.keys_under_root: true
json.overwrite_keys: true
tags: ["nginx-access"] #打上标记方便后续筛选
document_type: nginx-access
fields_under_root: false
fields:
kafka_topic: "nginx-access" #用于将不同类型log推送到对应的kafka topic
- type: log
enabled: true
paths:
- /var/log/nginx/error.log
document_type: nginx-error
tags: ["nginx-error"]
fields_under_root: false
fields:
kafka_topic: "nginx-error"
processors:
- drop_fields:
fields: ["offset"]
output.logstash:
hosts: ["127.0.0.1:5044"]
#output.kafka:
# hosts: ["mybroker:9092"]
# topic: '%{[fields.kafka_topic]}'
以endpoint -> demo.local为例
[root@demo cephuser]# cat /etc/logstash/conf.d/logstash.conf
input { beats { port => 5044 } }
filter {
if "nginx-access" in [tags] { #只对Access log进行操作
if [http_host] =~ "^demo.local$" { #按endpoint进行访问类型区分
mutate {
split => ["request_uri","/"]
add_field => ["bucket_name","%{[request_uri][1]}" ]
}
}
else {
mutate {
split => ["http_host","."]
add_field => ["bucket_name","%{[http_host][0]}" ]
}
}
}
}
output { stdout { codec => rubydebug } }
模拟客户端virtual-hosted style方式访问
curl bucket.demo.local/newobj/file/name
输出如下
[root@demo cephuser]# /usr/share/logstash/bin/logstash -f /etc/logstash/conf.d/logstash.conf
...
{
"body_bytes_sent" => 3650,
"beat" => {
"name" => "demo.local",
"version" => "6.2.4",
"hostname" => "demo.local"
},
"prospector" => {
"type" => "log"
},
"upstream_addr" => "-",
"time_local" => "[08/May/2018:03:21:25 -0400]",
"request_length" => 97,
"content_length" => 0,
"@version" => "1",
"bucket_name" => "bucket", #加上了
"request_method" => "GET",
"host" => "demo.local",
"status" => 404,
"fields" => {
"kafka_topic" => "nginx-access"
},
"tags" => [
[0] "nginx-access",
[1] "beats_input_raw_event"
],
"http_host" => [
[0] "bucket",
[1] "demo",
[2] "local"
],
"request_uri" => "/newobj/file/name",
"source" => "/var/log/nginx/access.log",
"http_referer" => "-",
"http_x_forwarded_for" => "-",
"server_protocol" => "HTTP/1.1",
"server_addr" => "127.0.0.1",
"request" => "GET /newobj/file/name HTTP/1.1",
"@timestamp" => 2018-05-08T07:21:31.364Z,
"request_time" => "0.000",
"scheme" => "http",
"remote_addr" => "127.0.0.1",
"http_user_agent" => "curl/7.29.0",
"upsteam_response_time" => 0.0
}
模拟客户端path style方式访问
curl demo.local/newbucket/newobj/file/name
输出如下
{
"body_bytes_sent" => 3650,
"beat" => {
"name" => "demo.local",
"version" => "6.2.4",
"hostname" => "demo.local"
},
"upstream_addr" => "-",
"prospector" => {
"type" => "log"
},
"time_local" => "[08/May/2018:03:25:01 -0400]",
"request_length" => 100,
"content_length" => 0,
"@version" => "1",
"bucket_name" => "newbucket",#加上了
"request_method" => "GET",
"host" => "demo.local",
"status" => 404,
"fields" => {
"kafka_topic" => "nginx-access"
},
"tags" => [
[0] "nginx-access",
[1] "beats_input_raw_event"
],
"http_host" => "demo.local",
"http_referer" => "-",
"source" => "/var/log/nginx/access.log",
"request_uri" => [
[0] "",
[1] "newbucket",
[2] "newobj",
[3] "file",
[4] "name"
],
"http_x_forwarded_for" => "-",
"server_protocol" => "HTTP/1.1",
"server_addr" => "127.0.0.1",
"request" => "GET /newbucket/newobj/file/name HTTP/1.1",
"@timestamp" => 2018-05-08T07:25:06.379Z,
"scheme" => "http",
"request_time" => "0.000",
"http_user_agent" => "curl/7.29.0",
"remote_addr" => "127.0.0.1",
"upsteam_response_time" => 0.0
}