共计 4743 个字符,预计需要花费 12 分钟才能阅读完成。
导读 | 今天换了另外一种方式来实现,直接去绘制渲染报警图表,然后上传到对象存储中保存起来,在钉钉中就可以直接展示了。Promoter 就是这个方案的一个实现。 |
之前用 Python 实现了一个非常简陋的 AlertManager 的钉钉接收器,一直想在钉钉的消息通知中将当前报警图表也展示出来,这样显然对用户来说更加友好。之前想的思路是通过爬虫的方式去 Prometheus 页面将 Graph 图形截图保存下来,该方式理论上确实是可行的,但是这种方式不稳定因素较多,而且会占用大量的资源。
今天换了另外一种方式来实现,直接去绘制渲染报警图表,然后上传到对象存储中保存起来,在钉钉中就可以直接展示了,Promoter 就是这个方案的一个实现,支持在消息通知中展示实时报警图表,效果图如下所示:
目前是将报警数据渲染成图片后上传到 S3 对象存储,所以需要配置一个对象存储 (阿里云 OSS 也可以),此外消息通知展示样式支持模板定制.
默认模板位于 template/default.tmpl,可以根据自己需求定制:
{{define "__subject"}}[{{.Status | toUpper}}{{if eq .Status "firing"}}:{{.Alerts.Firing | len}}{{end}}] {{.GroupLabels.SortedPairs.Values | join ""}} {{if gt (len .CommonLabels) (len .GroupLabels) }}({{with .CommonLabels.Remove .GroupLabels.Names}}{{.Values | join" "}}{{end}}){{end}}{{end}} | |
{{define "__alertmanagerURL"}}{{.ExternalURL}}/#/alerts?receiver={{.Receiver}}{{end}} | |
{{define "default.__text_alert_list"}}{{range .}} | |
### {{.Annotations.summary}} | |
** 详情:** {{.Annotations.description}} | |
{{range .Images}} | |
** 条件:** `{{.Title}}` | |
 | |
{{- end}} | |
** 标签:** | |
{{range .Labels.SortedPairs}}{{if and (ne (.Name) "severity") (ne (.Name) "summary") }}> - {{.Name}}: {{.Value | markdown | html}} | |
{{end}}{{end}} | |
{{end}}{{end}} | |
{{/* Default */}} | |
{{define "default.title"}}{{template "__subject" .}}{{end}} | |
{{define "default.content"}} | |
{{if gt (len .Alerts.Firing) 0 -}} | |
#### **{{.Alerts.Firing | len}} 条报警 ** | |
{{template "default.__text_alert_list" .Alerts.Firing}} | |
{{range .AtMobiles}}@{{.}}{{end}} | |
{{- end}} | |
{{if gt (len .Alerts.Resolved) 0 -}} | |
#### **{{.Alerts.Resolved | len}} 条报警恢复 ** | |
{{template "default.__text_alert_list" .Alerts.Resolved}} | |
{{range .AtMobiles}}@{{.}}{{end}} | |
{{- end}} | |
{{- end}} |
默认配置文件如下所示,放置在 /etc/promoter/config.yaml:
debug: true | |
http_port: 8080 | |
timeout: 5s | |
prometheus_url: # Prometheus 的地址 | |
metric_resolution: 100 | |
s3: | |
access_key: | |
secret_key: | |
endpoint: oss-cn-beijing.aliyuncs.com | |
region: cn-beijing | |
bucket: | |
dingtalk: | |
url: https://oapi.dingtalk.com/robot/send?access_token= | |
secret: # secret for signature |
可以直接使用 Docker 镜像 cnych/promoter:v0.1.1 部署,在 Kubernetes 中部署可以直接参考 deploy/kubernetes/promoter.yaml。
启动完成后在 AlertManager 配置中指定 Webhook 地址即可:
route: | |
group_by: ['alertname', 'cluster'] | |
group_wait: 30s | |
group_interval: 2m | |
repeat_interval: 1h | |
receiver: webhook | |
receivers: | |
- name: 'webhook' | |
webhook_configs: | |
- url: 'http://promoter.kube-mon.svc.cluster.local:8080/webhook' # 配置 promoter 的 webhook 接口 | |
send_resolved: true |
该项目采用 golang 实现,Webhook 的实现很简单,这里的核心部分是如何渲染监控图表,核心方式是通过 Prometheus 的 API 接口来获取查询的指标数据:
func Metrics(server, query string, queryTime time.Time, duration, step time.Duration) (promModel.Matrix, error) {client, err := prometheus.NewClient(prometheus.Config{Address: server}) | |
if err != nil {return nil, fmt.Errorf("failed to create Prometheus client: %v", err) | |
} | |
api := prometheusApi.NewAPI(client) | |
value, _, err := api.QueryRange(context.Background(), query, prometheusApi.Range{Start: queryTime.Add(-duration), | |
End: queryTime, | |
Step: duration / step, | |
}) | |
if err != nil {return nil, fmt.Errorf("failed to query Prometheus: %v", err) | |
} | |
metrics, ok := value.(promModel.Matrix) | |
if !ok {return nil, fmt.Errorf("unsupported result format: %s", value.Type().String()) | |
} | |
return metrics, nil | |
} |
然后将获取的指标绘制出来,图形绘制使用的 gonum.org/v1/plot 这个包来实现的:
func PlotMetric(metrics promModel.Matrix, level float64, direction string) (io.WriterTo, error) {p, err := plot.New() | |
if err != nil {return nil, fmt.Errorf("failed to create new plot: %v", err) | |
} | |
textFont, err := vg.MakeFont("Helvetica", 3*vg.Millimeter) | |
if err != nil {return nil, fmt.Errorf("failed to load font: %v", err) | |
} | |
evalTextFont, err := vg.MakeFont("Helvetica", 5*vg.Millimeter) | |
if err != nil {return nil, fmt.Errorf("failed to load font: %v", err) | |
} | |
evalTextStyle := draw.TextStyle{Color: color.NRGBA{A: 150}, | |
Font: evalTextFont, | |
XAlign: draw.XRight, | |
YAlign: draw.YBottom, | |
} | |
p.X.Tick.Marker = plot.TimeTicks{Format: "15:04:05"} | |
p.X.Tick.Label.Font = textFont | |
p.Y.Tick.Label.Font = textFont | |
p.Legend.Font = textFont | |
p.Legend.Top = true | |
p.Legend.YOffs = 15 * vg.Millimeter | |
// Color palette for drawing lines | |
paletteSize := 8 | |
palette, err := brewer.GetPalette(brewer.TypeAny, "Dark2", paletteSize) | |
if err != nil {return nil, fmt.Errorf("failed to get color palette: %v", err) | |
} | |
colors := palette.Colors() | |
var lastEvalValue float64 | |
for s, sample := range metrics {data := make(plotter.XYs, 0) | |
for _, v := range sample.Values {fs := v.Value.String() | |
if fs == "NaN" {_, err := drawLine(data, colors, s, paletteSize, p, metrics, sample) | |
if err != nil {return nil, err} | |
data = make(plotter.XYs, 0) | |
continue | |
} | |
f, err := strconv.ParseFloat(fs, 64) | |
if err != nil {return nil, fmt.Errorf("sample value not float: %s", v.Value.String()) | |
} | |
data = append(data, plotter.XY{X: float64(v.Timestamp.Unix()), Y: f}) | |
lastEvalValue = f | |
} | |
_, err := drawLine(data, colors, s, paletteSize, p, metrics, sample) | |
if err != nil {return nil, err} | |
} | |
var polygonPoints plotter.XYs | |
if direction == " 0 {l, err = plotter.NewLine(data) | |
if err != nil {return &plotter.Line{}, fmt.Errorf("failed to create line: %v", err) | |
} | |
l.LineStyle.Width = vg.Points(1) | |
l.LineStyle.Color = colors[s%paletteSize] | |
p.Add(l) | |
if len(metrics) > 1 {m := labelText.FindStringSubmatch(sample.Metric.String()) | |
if m != nil {p.Legend.Add(m[1], l) | |
} | |
} | |
} | |
return l, nil | |
} |
