feat(admin/metrics): add Prometheus-compatible metrics endpoint and ops documentation
- Add /api/metrics/prometheus endpoint using prom-client (unauthenticated for scraping) - Update middleware to allow unauthenticated access to prometheus endpoint - Add /api/metrics permission routing (platform:read for GET) - Install prom-client dependency - Add metrics.md with Grafana dashboard JSON, Prometheus config, alerting rules
This commit is contained in:
parent
fb27918285
commit
27cd4ea83c
278
admin/metrics.md
Normal file
278
admin/metrics.md
Normal file
@ -0,0 +1,278 @@
|
|||||||
|
# Admin 平台指标 — Grafana / Prometheus 配置指南
|
||||||
|
|
||||||
|
## 概述
|
||||||
|
|
||||||
|
Admin 服务暴露两个指标端点:
|
||||||
|
|
||||||
|
| 端点 | 格式 | 用途 |
|
||||||
|
|------|------|------|
|
||||||
|
| `GET /api/metrics` | JSON | 前端页面 / 人工查看 / API 消费 |
|
||||||
|
| `GET /api/metrics/prometheus` | Prometheus Text | Prometheus 采集 |
|
||||||
|
|
||||||
|
Prometheus 端点 **无需认证**,可直接 scrape。
|
||||||
|
|
||||||
|
## 采集的指标
|
||||||
|
|
||||||
|
所有指标通过 `platform_entity_count` Gauge 暴露,带 `entity` 和 `window` 两个 label:
|
||||||
|
|
||||||
|
```
|
||||||
|
# HELP platform_entity_count Platform entity counts by time window
|
||||||
|
# TYPE platform_entity_count gauge
|
||||||
|
platform_entity_count{entity="users",window="total"} 1000
|
||||||
|
platform_entity_count{entity="users",window="27h"} 5
|
||||||
|
platform_entity_count{entity="users",window="7d"} 32
|
||||||
|
platform_entity_count{entity="users",window="30d"} 150
|
||||||
|
platform_entity_count{entity="workspaces",window="total"} 50
|
||||||
|
platform_entity_count{entity="workspaces",window="27h"} 1
|
||||||
|
...
|
||||||
|
platform_entity_count{entity="skills",window="30d"} 45
|
||||||
|
```
|
||||||
|
|
||||||
|
Entity 列表:`users`、`workspaces`、`projects`、`repos`、`rooms`、`skills`
|
||||||
|
|
||||||
|
Window 列表:`total`(累计)、`27h`(近27小时)、`7d`(近7天)、`30d`(近30天)
|
||||||
|
|
||||||
|
## Prometheus 配置
|
||||||
|
|
||||||
|
### prometheus.yml
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
scrape_configs:
|
||||||
|
- job_name: 'admin-metrics'
|
||||||
|
scrape_interval: 60s
|
||||||
|
metrics_path: '/api/metrics/prometheus'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['<admin-host>:<port>']
|
||||||
|
labels:
|
||||||
|
env: 'production'
|
||||||
|
service: 'admin'
|
||||||
|
```
|
||||||
|
|
||||||
|
### K8s ServiceMonitor(如果用 prometheus-operator)
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
apiVersion: monitoring.coreos.com/v1
|
||||||
|
kind: ServiceMonitor
|
||||||
|
metadata:
|
||||||
|
name: admin-metrics
|
||||||
|
namespace: monitoring
|
||||||
|
labels:
|
||||||
|
release: prometheus
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app: admin
|
||||||
|
endpoints:
|
||||||
|
- port: http
|
||||||
|
path: /api/metrics/prometheus
|
||||||
|
interval: 60s
|
||||||
|
```
|
||||||
|
|
||||||
|
## Grafana Dashboard
|
||||||
|
|
||||||
|
### 推荐 Panel 配置
|
||||||
|
|
||||||
|
#### Panel 1: 实体总量(Stat Panel)
|
||||||
|
|
||||||
|
```
|
||||||
|
Query:
|
||||||
|
platform_entity_count{window="total"}
|
||||||
|
|
||||||
|
Visualization: Stat
|
||||||
|
- Show: Value
|
||||||
|
- Color mode: Background
|
||||||
|
- Thresholds: 按实际业务设定
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Panel 2: 27 小时增长趋势(Time Series / Bar Gauge)
|
||||||
|
|
||||||
|
```
|
||||||
|
Query:
|
||||||
|
platform_entity_count{window="27h"}
|
||||||
|
|
||||||
|
Visualization: Bar Gauge
|
||||||
|
- Display: Basic
|
||||||
|
- Show: Value
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Panel 3: 7 天 / 30 天对比(Bar Chart)
|
||||||
|
|
||||||
|
```
|
||||||
|
Query:
|
||||||
|
platform_entity_count{window=~"7d|30d"}
|
||||||
|
|
||||||
|
Visualization: Bar Chart
|
||||||
|
- Group by: entity
|
||||||
|
- Bar mode: grouped
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Panel 4: 总量汇总表(Table Panel)
|
||||||
|
|
||||||
|
```
|
||||||
|
Query:
|
||||||
|
platform_entity_count
|
||||||
|
|
||||||
|
Transform:
|
||||||
|
1. Labels to fields
|
||||||
|
2. Pivot by entity
|
||||||
|
3. Organize fields
|
||||||
|
|
||||||
|
Visualization: Table
|
||||||
|
```
|
||||||
|
|
||||||
|
### Dashboard JSON 模板
|
||||||
|
|
||||||
|
将以下 JSON 导入 Grafana(Dashboard → Import → Paste JSON):
|
||||||
|
|
||||||
|
> 注:`uid` 和 `datasource` 需要根据实际 Prometheus 数据源修改。
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"dashboard": {
|
||||||
|
"title": "Admin 平台指标",
|
||||||
|
"tags": ["admin", "platform"],
|
||||||
|
"timezone": "browser",
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"title": "实体总量",
|
||||||
|
"type": "stat",
|
||||||
|
"gridPos": { "h": 4, "w": 24, "x": 0, "y": 0 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "platform_entity_count{window=\"total\"}",
|
||||||
|
"legendFormat": "{{entity}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {
|
||||||
|
"colorMode": "background",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "auto"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{ "color": "green", "value": null },
|
||||||
|
{ "color": "yellow", "value": 100 },
|
||||||
|
{ "color": "red", "value": 1000 }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"title": "近 27 小时新增",
|
||||||
|
"type": "bargauge",
|
||||||
|
"gridPos": { "h": 6, "w": 12, "x": 0, "y": 4 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "platform_entity_count{window=\"27h\"}",
|
||||||
|
"legendFormat": "{{entity}}"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {
|
||||||
|
"displayMode": "gradient",
|
||||||
|
"orientation": "horizontal"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"title": "近 7 天 / 30 天对比",
|
||||||
|
"type": "barchart",
|
||||||
|
"gridPos": { "h": 6, "w": 12, "x": 12, "y": 4 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "platform_entity_count{window=~\"7d|30d\"}",
|
||||||
|
"legendFormat": "{{entity}} ({{window}})"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {
|
||||||
|
"barRadius": 0.05,
|
||||||
|
"groupWidth": 0.7,
|
||||||
|
"orientation": "auto"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"title": "指标汇总表",
|
||||||
|
"type": "table",
|
||||||
|
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 10 },
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"expr": "platform_entity_count",
|
||||||
|
"format": "table",
|
||||||
|
"instant": true
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"transformations": [
|
||||||
|
{ "id": "labelsToFields", "options": {} },
|
||||||
|
{
|
||||||
|
"id": "organize",
|
||||||
|
"options": {
|
||||||
|
"excludeByName": { "Time": true, "__name__": true },
|
||||||
|
"indexByName": { "entity": 0, "window": 1, "Value": 2 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"options": {
|
||||||
|
"showHeader": true,
|
||||||
|
"sortBy": [{ "desc": false, "displayName": "entity" }]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"time": { "from": "now-24h", "to": "now" },
|
||||||
|
"refresh": "1m"
|
||||||
|
},
|
||||||
|
"overwrite": true
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## 告警规则(可选)
|
||||||
|
|
||||||
|
### prometheus rules
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
groups:
|
||||||
|
- name: admin-entity-growth
|
||||||
|
rules:
|
||||||
|
# 27 小时内用户增长超过 100 告警
|
||||||
|
- alert: HighUserGrowth27h
|
||||||
|
expr: platform_entity_count{entity="users", window="27h"} > 100
|
||||||
|
for: 5m
|
||||||
|
labels:
|
||||||
|
severity: warning
|
||||||
|
annotations:
|
||||||
|
summary: "27 小时内新增用户 {{ $value }} 超过阈值"
|
||||||
|
|
||||||
|
# 仓库 7 天零增长告警
|
||||||
|
- alert: NoRepoGrowth7d
|
||||||
|
expr: platform_entity_count{entity="repos", window="7d"} == 0
|
||||||
|
and on() platform_entity_count{entity="repos", window="total"} > 0
|
||||||
|
for: 1h
|
||||||
|
labels:
|
||||||
|
severity: info
|
||||||
|
annotations:
|
||||||
|
summary: "近 7 天无新增仓库"
|
||||||
|
```
|
||||||
|
|
||||||
|
## 验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. JSON 格式
|
||||||
|
curl http://localhost:3000/api/metrics | jq .
|
||||||
|
|
||||||
|
# 2. Prometheus 格式
|
||||||
|
curl http://localhost:3000/api/metrics/prometheus
|
||||||
|
|
||||||
|
# 预期输出:
|
||||||
|
# HELP platform_entity_count Platform entity counts by time window
|
||||||
|
# TYPE platform_entity_count gauge
|
||||||
|
platform_entity_count{entity="users",window="27h"} 0
|
||||||
|
platform_entity_count{entity="users",window="30d"} 0
|
||||||
|
platform_entity_count{entity="users",window="7d"} 0
|
||||||
|
platform_entity_count{entity="users",window="total"} 5
|
||||||
|
...
|
||||||
|
```
|
||||||
38
admin/package-lock.json
generated
38
admin/package-lock.json
generated
@ -21,6 +21,7 @@
|
|||||||
"next": "16.2.4",
|
"next": "16.2.4",
|
||||||
"node-cron": "^4.2.1",
|
"node-cron": "^4.2.1",
|
||||||
"pg": "^8.11.3",
|
"pg": "^8.11.3",
|
||||||
|
"prom-client": "^15.1.3",
|
||||||
"react": "19.2.4",
|
"react": "19.2.4",
|
||||||
"react-dom": "19.2.4",
|
"react-dom": "19.2.4",
|
||||||
"tailwind-merge": "^2.2.0",
|
"tailwind-merge": "^2.2.0",
|
||||||
@ -1677,6 +1678,15 @@
|
|||||||
"node": ">=12.4.0"
|
"node": ">=12.4.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/@opentelemetry/api": {
|
||||||
|
"version": "1.9.1",
|
||||||
|
"resolved": "https://registry.npmmirror.com/@opentelemetry/api/-/api-1.9.1.tgz",
|
||||||
|
"integrity": "sha512-gLyJlPHPZYdAk1JENA9LeHejZe1Ti77/pTeFm/nMXmQH/HFZlcS/O2XJB+L8fkbrNSqhdtlvjBVjxwUYanNH5Q==",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=8.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/@phc/format": {
|
"node_modules/@phc/format": {
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"resolved": "https://registry.npmmirror.com/@phc/format/-/format-1.0.0.tgz",
|
"resolved": "https://registry.npmmirror.com/@phc/format/-/format-1.0.0.tgz",
|
||||||
@ -2964,6 +2974,12 @@
|
|||||||
"node": ">= 10.0.0"
|
"node": ">= 10.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/bintrees": {
|
||||||
|
"version": "1.0.2",
|
||||||
|
"resolved": "https://registry.npmmirror.com/bintrees/-/bintrees-1.0.2.tgz",
|
||||||
|
"integrity": "sha512-VOMgTMwjAaUG580SXn3LacVgjurrbMme7ZZNYGSSV7mmtY6QQRh0Eg3pwIcntQ77DErK1L0NxkbetjcoXzVwKw==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/brace-expansion": {
|
"node_modules/brace-expansion": {
|
||||||
"version": "1.1.14",
|
"version": "1.1.14",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
@ -6082,6 +6098,19 @@
|
|||||||
"node": ">= 0.8.0"
|
"node": ">= 0.8.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/prom-client": {
|
||||||
|
"version": "15.1.3",
|
||||||
|
"resolved": "https://registry.npmmirror.com/prom-client/-/prom-client-15.1.3.tgz",
|
||||||
|
"integrity": "sha512-6ZiOBfCywsD4k1BN9IX0uZhF+tJkV8q8llP64G5Hajs4JOeVLPCwpPVcpXy3BwYiUGgyJzsJJQeOIv7+hDSq8g==",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"dependencies": {
|
||||||
|
"@opentelemetry/api": "^1.4.0",
|
||||||
|
"tdigest": "^0.1.1"
|
||||||
|
},
|
||||||
|
"engines": {
|
||||||
|
"node": "^16 || ^18 || >=20"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/prop-types": {
|
"node_modules/prop-types": {
|
||||||
"version": "15.8.1",
|
"version": "15.8.1",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
@ -6835,6 +6864,15 @@
|
|||||||
"version": "4.0.0",
|
"version": "4.0.0",
|
||||||
"license": "ISC"
|
"license": "ISC"
|
||||||
},
|
},
|
||||||
|
"node_modules/tdigest": {
|
||||||
|
"version": "0.1.2",
|
||||||
|
"resolved": "https://registry.npmmirror.com/tdigest/-/tdigest-0.1.2.tgz",
|
||||||
|
"integrity": "sha512-+G0LLgjjo9BZX2MfdvPfH+MKLCrxlXSYec5DaPYP1fe6Iyhf0/fSmJ0bFiZ1F8BT6cGXl2LpltQptzjXKWEkKA==",
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"bintrees": "1.0.2"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/tinyglobby": {
|
"node_modules/tinyglobby": {
|
||||||
"version": "0.2.16",
|
"version": "0.2.16",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
|
|||||||
@ -26,6 +26,7 @@
|
|||||||
"next": "16.2.4",
|
"next": "16.2.4",
|
||||||
"node-cron": "^4.2.1",
|
"node-cron": "^4.2.1",
|
||||||
"pg": "^8.11.3",
|
"pg": "^8.11.3",
|
||||||
|
"prom-client": "^15.1.3",
|
||||||
"react": "19.2.4",
|
"react": "19.2.4",
|
||||||
"react-dom": "19.2.4",
|
"react-dom": "19.2.4",
|
||||||
"tailwind-merge": "^2.2.0",
|
"tailwind-merge": "^2.2.0",
|
||||||
|
|||||||
74
admin/src/app/api/metrics/prometheus/route.ts
Normal file
74
admin/src/app/api/metrics/prometheus/route.ts
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
import { NextResponse } from "next/server";
|
||||||
|
import { Registry, Gauge } from "prom-client";
|
||||||
|
import { query } from "@/lib/db";
|
||||||
|
|
||||||
|
export const runtime = "nodejs";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prometheus-compatible metrics endpoint.
|
||||||
|
*
|
||||||
|
* Usage in prometheus.yml:
|
||||||
|
* - job_name: 'admin-metrics'
|
||||||
|
* scrape_interval: 60s
|
||||||
|
* static_configs:
|
||||||
|
* - targets: ['admin:3000']
|
||||||
|
* metrics_path: '/api/metrics/prometheus'
|
||||||
|
*/
|
||||||
|
export async function GET() {
|
||||||
|
const register = new Registry();
|
||||||
|
|
||||||
|
const gauge = new Gauge({
|
||||||
|
name: "platform_entity_count",
|
||||||
|
help: "Platform entity counts by time window",
|
||||||
|
labelNames: ["entity", "window"] as const,
|
||||||
|
registers: [register],
|
||||||
|
});
|
||||||
|
|
||||||
|
const entities = [
|
||||||
|
{ name: "users", table: '"user"' },
|
||||||
|
{ name: "workspaces", table: "workspace" },
|
||||||
|
{ name: "projects", table: "project" },
|
||||||
|
{ name: "repos", table: "repo" },
|
||||||
|
{ name: "rooms", table: "room" },
|
||||||
|
{ name: "skills", table: "project_skill" },
|
||||||
|
];
|
||||||
|
|
||||||
|
const results = await Promise.all(
|
||||||
|
entities.map(async ({ name, table }) => {
|
||||||
|
const res = await query<{
|
||||||
|
total: string;
|
||||||
|
new_27h: string;
|
||||||
|
new_7d: string;
|
||||||
|
new_30d: string;
|
||||||
|
}>(
|
||||||
|
`SELECT
|
||||||
|
COUNT(*) AS total,
|
||||||
|
COUNT(*) FILTER (WHERE created_at >= NOW() - INTERVAL '27 hours') AS new_27h,
|
||||||
|
COUNT(*) FILTER (WHERE created_at >= NOW() - INTERVAL '7 days') AS new_7d,
|
||||||
|
COUNT(*) FILTER (WHERE created_at >= NOW() - INTERVAL '30 days') AS new_30d
|
||||||
|
FROM ${table}`
|
||||||
|
);
|
||||||
|
const row = res.rows[0];
|
||||||
|
return {
|
||||||
|
name,
|
||||||
|
total: parseInt(row.total, 10),
|
||||||
|
new_27h: parseInt(row.new_27h, 10),
|
||||||
|
new_7d: parseInt(row.new_7d, 10),
|
||||||
|
new_30d: parseInt(row.new_30d, 10),
|
||||||
|
};
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
for (const r of results) {
|
||||||
|
gauge.set({ entity: r.name, window: "total" }, r.total);
|
||||||
|
gauge.set({ entity: r.name, window: "27h" }, r.new_27h);
|
||||||
|
gauge.set({ entity: r.name, window: "7d" }, r.new_7d);
|
||||||
|
gauge.set({ entity: r.name, window: "30d" }, r.new_30d);
|
||||||
|
}
|
||||||
|
|
||||||
|
const metrics = await register.metrics();
|
||||||
|
|
||||||
|
return new NextResponse(metrics, {
|
||||||
|
headers: { "Content-Type": register.contentType },
|
||||||
|
});
|
||||||
|
}
|
||||||
@ -12,7 +12,7 @@ import { createHash } from "crypto";
|
|||||||
import { parseSessionCookie, loadAdminSession, canAccess } from "@/lib/auth";
|
import { parseSessionCookie, loadAdminSession, canAccess } from "@/lib/auth";
|
||||||
import { query } from "@/lib/db";
|
import { query } from "@/lib/db";
|
||||||
|
|
||||||
const PUBLIC_PATHS = ["/login", "/api/auth/login", "/api/auth/oidc", "/api/health"];
|
const PUBLIC_PATHS = ["/login", "/api/auth/login", "/api/auth/oidc", "/api/health", "/api/metrics/prometheus"];
|
||||||
const PROTECTED_PATHS = ["/dashboard", "/admin", "/platform"];
|
const PROTECTED_PATHS = ["/dashboard", "/admin", "/platform"];
|
||||||
|
|
||||||
function getRequiredPermission(path: string, method: string): string | null {
|
function getRequiredPermission(path: string, method: string): string | null {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user