Grafana可视化
📋 概述
Grafana是一个开源的监控和可观测性平台,提供强大的数据可视化功能。它可以连接多种数据源,创建丰富的仪表板,并支持告警和团队协作。
🎯 学习目标
- 掌握Grafana的核心功能和概念
- 学会创建和配置仪表板
- 了解查询编辑器和可视化选项
- 掌握告警和通知配置
📚 Grafana核心概念
数据源(Data Sources)
Grafana支持多种数据源,包括Prometheus、InfluxDB、Elasticsearch等。
json
{
"name": "Prometheus",
"type": "prometheus",
"url": "http://localhost:9090",
"access": "proxy",
"basicAuth": false,
"isDefault": true
}
仪表板(Dashboards)
包含一个或多个面板的可视化界面。
面板(Panels)
仪表板中的可视化组件,如图表、表格、单值显示等。
查询(Queries)
从数据源获取数据的表达式。
🛠 Node.js应用仪表板配置
完整的仪表板JSON配置
json
{
"dashboard": {
"id": null,
"title": "Node.js Application Dashboard",
"tags": ["nodejs", "monitoring"],
"timezone": "browser",
"refresh": "30s",
"time": {
"from": "now-1h",
"to": "now"
},
"panels": [
{
"id": 1,
"title": "Request Rate",
"type": "graph",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"targets": [
{
"expr": "sum(rate(http_requests_total[5m])) by (method)",
"legendFormat": "{{method}}",
"refId": "A"
}
],
"yAxes": [
{
"label": "Requests/sec",
"min": 0
}
],
"legend": {
"show": true,
"values": false,
"current": false,
"max": false,
"min": false,
"avg": false
},
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
}
},
{
"id": 2,
"title": "Response Time (95th percentile)",
"type": "graph",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"targets": [
{
"expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "95th percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))",
"legendFormat": "50th percentile",
"refId": "B"
}
],
"yAxes": [
{
"label": "Seconds",
"min": 0
}
]
},
{
"id": 3,
"title": "Error Rate",
"type": "singlestat",
"gridPos": {
"h": 4,
"w": 6,
"x": 0,
"y": 8
},
"targets": [
{
"expr": "sum(rate(http_requests_total{status_code=~\"5..\"}[5m])) / sum(rate(http_requests_total[5m])) * 100",
"refId": "A"
}
],
"valueName": "current",
"format": "percent",
"colorBackground": true,
"thresholds": "1,5",
"colors": ["#299c46", "#e5ac0e", "#d44a3a"]
},
{
"id": 4,
"title": "Active Users",
"type": "singlestat",
"gridPos": {
"h": 4,
"w": 6,
"x": 6,
"y": 8
},
"targets": [
{
"expr": "active_users_total",
"refId": "A"
}
],
"valueName": "current",
"format": "short",
"colorValue": true,
"thresholds": "100,1000",
"colors": ["#d44a3a", "#e5ac0e", "#299c46"]
},
{
"id": 5,
"title": "Memory Usage",
"type": "graph",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 8
},
"targets": [
{
"expr": "process_resident_memory_bytes / 1024 / 1024",
"legendFormat": "RSS Memory (MB)",
"refId": "A"
},
{
"expr": "nodejs_heap_size_used_bytes / 1024 / 1024",
"legendFormat": "Heap Used (MB)",
"refId": "B"
},
{
"expr": "nodejs_heap_size_total_bytes / 1024 / 1024",
"legendFormat": "Heap Total (MB)",
"refId": "C"
}
],
"yAxes": [
{
"label": "MB",
"min": 0
}
],
"fill": 1,
"linewidth": 2
},
{
"id": 6,
"title": "CPU Usage",
"type": "graph",
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"targets": [
{
"expr": "rate(process_cpu_seconds_total[5m]) * 100",
"legendFormat": "CPU Usage %",
"refId": "A"
}
],
"yAxes": [
{
"label": "Percent",
"min": 0,
"max": 100
}
]
},
{
"id": 7,
"title": "Database Queries",
"type": "graph",
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"targets": [
{
"expr": "sum(rate(db_queries_total[5m])) by (operation)",
"legendFormat": "{{operation}}",
"refId": "A"
}
],
"yAxes": [
{
"label": "Queries/sec",
"min": 0
}
]
},
{
"id": 8,
"title": "Top Endpoints by Request Count",
"type": "table",
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 24
},
"targets": [
{
"expr": "topk(10, sum(rate(http_requests_total[5m])) by (route))",
"format": "table",
"instant": true,
"refId": "A"
}
],
"columns": [
{
"text": "Route",
"value": "route"
},
{
"text": "Requests/sec",
"value": "Value"
}
],
"sort": {
"col": 1,
"desc": true
}
}
],
"templating": {
"list": [
{
"name": "instance",
"type": "query",
"query": "label_values(up, instance)",
"refresh": 1,
"includeAll": true,
"multi": true
},
{
"name": "method",
"type": "query",
"query": "label_values(http_requests_total, method)",
"refresh": 1,
"includeAll": true,
"multi": true
}
]
},
"annotations": {
"list": [
{
"name": "Deployments",
"datasource": "Prometheus",
"expr": "changes(process_start_time_seconds[1h]) > 0",
"titleFormat": "Deployment",
"textFormat": "Application restarted"
}
]
}
}
}
仪表板变量配置
json
{
"templating": {
"list": [
{
"name": "datasource",
"type": "datasource",
"query": "prometheus",
"current": {
"value": "Prometheus",
"text": "Prometheus"
}
},
{
"name": "instance",
"type": "query",
"datasource": "$datasource",
"query": "label_values(up, instance)",
"refresh": 1,
"includeAll": true,
"multi": true,
"current": {
"value": "$__all",
"text": "All"
}
},
{
"name": "interval",
"type": "interval",
"query": "1m,5m,10m,30m,1h",
"current": {
"value": "5m",
"text": "5m"
}
}
]
}
}
🎨 可视化组件详解
时间序列图表
json
{
"type": "timeseries",
"title": "Request Rate",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"barAlignment": 0,
"lineWidth": 1,
"fillOpacity": 10,
"gradientMode": "none",
"spanNulls": false,
"insertNulls": false,
"showPoints": "never",
"pointSize": 5,
"stacking": {
"mode": "none",
"group": "A"
},
"axisPlacement": "auto",
"axisLabel": "",
"scaleDistribution": {
"type": "linear"
},
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
},
"thresholdsStyle": {
"mode": "off"
}
},
"color": {
"mode": "palette-classic"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "reqps"
}
},
"options": {
"tooltip": {
"mode": "multi",
"sort": "desc"
},
"legend": {
"displayMode": "list",
"placement": "bottom"
}
},
"targets": [
{
"expr": "sum(rate(http_requests_total[$interval])) by (method)",
"legendFormat": "{{method}}",
"refId": "A"
}
]
}
单值显示
json
{
"type": "stat",
"title": "Current Error Rate",
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "red",
"value": 5
}
]
},
"unit": "percent"
}
},
"options": {
"reduceOptions": {
"values": false,
"calcs": ["lastNotNull"],
"fields": ""
},
"orientation": "auto",
"textMode": "auto",
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto"
},
"targets": [
{
"expr": "sum(rate(http_requests_total{status_code=~\"5..\"}[$interval])) / sum(rate(http_requests_total[$interval])) * 100",
"refId": "A"
}
]
}
表格视图
json
{
"type": "table",
"title": "Top Error Routes",
"fieldConfig": {
"defaults": {
"custom": {
"align": "auto",
"displayMode": "auto"
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": [
{
"matcher": {
"id": "byName",
"options": "Error Rate"
},
"properties": [
{
"id": "unit",
"value": "percent"
},
{
"id": "custom.displayMode",
"value": "color-background"
},
{
"id": "thresholds",
"value": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "yellow",
"value": 1
},
{
"color": "red",
"value": 5
}
]
}
}
]
}
]
},
"options": {
"showHeader": true,
"sortBy": [
{
"desc": true,
"displayName": "Error Rate"
}
]
},
"targets": [
{
"expr": "topk(10, sum(rate(http_requests_total{status_code=~\"5..\"}[$interval])) by (route) / sum(rate(http_requests_total[$interval])) by (route) * 100)",
"format": "table",
"instant": true,
"refId": "A"
}
],
"transformations": [
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true,
"__name__": true,
"job": true
},
"indexByName": {},
"renameByName": {
"route": "Route",
"Value": "Error Rate"
}
}
}
]
}
热力图
json
{
"type": "heatmap",
"title": "Response Time Distribution",
"fieldConfig": {
"defaults": {
"custom": {
"hideFrom": {
"legend": false,
"tooltip": false,
"vis": false
},
"scaleDistribution": {
"type": "linear"
}
}
}
},
"options": {
"calculate": false,
"yAxis": {
"axisPlacement": "left",
"reverse": false,
"unit": "s"
},
"cellGap": 1,
"color": {
"mode": "spectrum",
"fill": "dark-orange",
"scale": "exponential",
"exponent": 0.5,
"scheme": "Oranges"
},
"exemplars": {
"color": "rgba(255,0,255,0.7)"
},
"filterValues": {
"le": 1e-9
},
"legend": {
"show": false
},
"rowsFrame": {
"layout": "auto"
},
"tooltip": {
"show": true,
"yHistogram": false
},
"yAxis": {
"axisPlacement": "left",
"reverse": false,
"unit": "s"
}
},
"targets": [
{
"expr": "sum(increase(http_request_duration_seconds_bucket[$interval])) by (le)",
"format": "heatmap",
"legendFormat": "{{le}}",
"refId": "A"
}
]
}
🚨 告警配置
告警规则配置
json
{
"alert": {
"conditions": [
{
"evaluator": {
"params": [5],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"model": {
"expr": "sum(rate(http_requests_total{status_code=~\"5..\"}[5m])) / sum(rate(http_requests_total[5m])) * 100",
"refId": "A"
},
"params": ["A", "5m", "now"]
},
"reducer": {
"params": [],
"type": "last"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "10s",
"handler": 1,
"name": "High Error Rate Alert",
"noDataState": "no_data",
"notifications": [
{
"uid": "slack-notifications"
},
{
"uid": "email-notifications"
}
]
}
}
通知渠道配置
json
{
"name": "Slack Notifications",
"type": "slack",
"settings": {
"url": "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK",
"channel": "#alerts",
"username": "Grafana",
"title": "{{ range .Alerts }}{{ .AlertName }}{{ end }}",
"text": "{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}",
"iconEmoji": ":exclamation:",
"iconUrl": "",
"mentionUsers": "",
"mentionGroups": "",
"mentionChannel": ""
}
}
🔧 高级功能
数据链接配置
json
{
"dataLinks": [
{
"title": "View Logs",
"url": "http://localhost:3000/explore?orgId=1&left=%5B%22now-1h%22,%22now%22,%22Loki%22,%7B%22expr%22:%22%7Binstance%3D%5C%22${__field.labels.instance}%5C%22%7D%22%7D%5D",
"targetBlank": true
},
{
"title": "View Metrics",
"url": "http://localhost:9090/graph?g0.expr=up%7Binstance%3D%22${__field.labels.instance}%22%7D&g0.tab=1&g0.stacked=0&g0.range_input=1h",
"targetBlank": true
}
]
}
转换和计算
json
{
"transformations": [
{
"id": "reduce",
"options": {
"reducers": ["mean", "max", "min"]
}
},
{
"id": "calculateField",
"options": {
"mode": "binary",
"reduce": {
"reducer": "sum"
},
"binary": {
"left": "Value #A",
"operator": "/",
"right": "Value #B"
},
"alias": "Success Rate"
}
},
{
"id": "organize",
"options": {
"excludeByName": {
"Time": true
},
"indexByName": {},
"renameByName": {
"Value": "Success Rate %"
}
}
}
]
}
自定义面板插件
javascript
// custom-panel-plugin.js
import { PanelPlugin } from '@grafana/data';
import { SimplePanel } from './SimplePanel';
export const plugin = new PanelPlugin(SimplePanel).setPanelOptions(builder => {
return builder
.addTextInput({
path: 'title',
name: 'Panel Title',
description: 'Custom title for the panel',
defaultValue: 'My Custom Panel',
})
.addSelect({
path: 'displayMode',
name: 'Display Mode',
description: 'How to display the data',
defaultValue: 'table',
settings: {
options: [
{ value: 'table', label: 'Table' },
{ value: 'chart', label: 'Chart' },
{ value: 'stat', label: 'Stat' }
],
},
})
.addBooleanSwitch({
path: 'showLegend',
name: 'Show Legend',
defaultValue: true,
});
});
📊 性能优化
查询优化
javascript
// 优化前:高基数查询
sum(rate(http_requests_total[5m])) by (instance, method, route, status_code)
// 优化后:降低基数
sum(rate(http_requests_total[5m])) by (method, status_code)
// 使用recording rules预计算
// prometheus.yml
rule_files:
- "recording_rules.yml"
// recording_rules.yml
groups:
- name: http_requests
rules:
- record: http:request_rate_5m
expr: sum(rate(http_requests_total[5m])) by (method, status_code)
- record: http:error_rate_5m
expr: sum(rate(http_requests_total{status_code=~"5.."}[5m])) / sum(rate(http_requests_total[5m]))
仪表板优化
json
{
"refresh": "30s",
"time": {
"from": "now-1h",
"to": "now"
},
"panels": [
{
"maxDataPoints": 100,
"interval": "30s",
"cacheTimeout": "60s"
}
]
}
📝 总结
Grafana为Node.js应用提供了强大的可视化能力:
- 丰富的可视化组件:图表、表格、热力图等
- 灵活的仪表板配置:支持变量、注释、链接
- 强大的告警功能:多种通知渠道和告警规则
- 可扩展性:支持插件和自定义面板
通过合理的仪表板设计和查询优化,可以构建出高效的监控可视化系统。