健康检查 Health Check
大约 8 分钟约 2529 字
健康检查 Health Check
简介
ASP.NET Core 内置的健康检查中间件提供应用健康状态监控端点。它可以检查数据库连接、外部服务、磁盘空间、自定义逻辑等,配合 Kubernetes、Docker、负载均衡器等实现自动故障转移和健康探针。
特点
基本用法
注册健康检查
/// <summary>
/// 基本健康检查配置
/// </summary>
var builder = WebApplication.CreateBuilder(args);
// 添加健康检查服务
builder.Services.AddHealthChecks();
var app = builder.Build();
// 映射健康检查端点
app.MapHealthChecks("/health");
app.Run();
// 访问 /health 返回 "Healthy"添加数据库检查
/// <summary>
/// 添加各类依赖服务的健康检查
/// </summary>
builder.Services.AddHealthChecks()
// SQL Server
.AddSqlServer(
connectionString: builder.Configuration.GetConnectionString("Default")!,
name: "sqlserver",
timeout: TimeSpan.FromSeconds(5),
tags: new[] { "database", "sql" })
// Redis
.AddRedis(
builder.Configuration.GetConnectionString("Redis")!,
name: "redis",
timeout: TimeSpan.FromSeconds(3),
tags: new[] { "cache", "redis" })
// RabbitMQ
.AddRabbitMQ(
builder.Configuration.GetConnectionString("RabbitMQ")!,
name: "rabbitmq",
tags: new[] { "mq" })
// MongoDB
.AddMongoDb(
builder.Configuration.GetConnectionString("MongoDB")!,
name: "mongodb",
tags: new[] { "database", "nosql" })
// Elasticsearch
.AddElasticsearch(
builder.Configuration["Elasticsearch:Url"]!,
name: "elasticsearch",
tags: new[] { "search" });NuGet 包
# 常用健康检查扩展包
dotnet add package AspNetCore.HealthChecks.SqlServer
dotnet add package AspNetCore.HealthChecks.Redis
dotnet add package AspNetCore.HealthChecks.RabbitMQ
dotnet add package AspNetCore.HealthChecks.MongoDb
dotnet add package AspNetCore.HealthChecks.Elasticsearch
dotnet add package AspNetCore.HealthChecks.UI自定义健康检查
实现 IHealthCheck
/// <summary>
/// 自定义健康检查 — 外部 API 可用性检查
/// </summary>
public class ExternalApiHealthCheck : IHealthCheck
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly ILogger<ExternalApiHealthCheck> _logger;
public ExternalApiHealthCheck(IHttpClientFactory httpClientFactory, ILogger<ExternalApiHealthCheck> logger)
{
_httpClientFactory = httpClientFactory;
_logger = logger;
}
public async Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken ct = default)
{
try
{
var client = _httpClientFactory.CreateClient("ExternalApi");
var response = await client.GetAsync("/api/ping", ct);
if (response.IsSuccessStatusCode)
{
return HealthCheckResult.Healthy("外部 API 正常");
}
return HealthCheckResult.Degraded($"外部 API 返回异常状态码:{response.StatusCode}");
}
catch (Exception ex)
{
_logger.LogError(ex, "外部 API 健康检查失败");
return HealthCheckResult.Unhealthy("外部 API 不可用", ex);
}
}
}
// 注册
builder.Services.AddHealthChecks()
.AddCheck<ExternalApiHealthCheck>("external_api", tags: new[] { "external" });磁盘空间检查
/// <summary>
/// 检查磁盘空间
/// </summary>
public class DiskSpaceHealthCheck : IHealthCheck
{
private readonly long _minimumFreeBytes;
public DiskSpaceHealthCheck(long minimumFreeBytes = 1024 * 1024 * 1024) // 默认最低 1GB
{
_minimumFreeBytes = minimumFreeBytes;
}
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken ct = default)
{
var drive = new DriveInfo(Path.GetPathRoot(AppContext.BaseDirectory)!);
var freeGB = drive.AvailableFreeSpace / (1024.0 * 1024 * 1024);
var totalGB = drive.TotalSize / (1024.0 * 1024 * 1024);
if (drive.AvailableFreeSpace < _minimumFreeBytes)
{
return Task.FromResult(HealthCheckResult.Unhealthy(
$"磁盘空间不足:可用 {freeGB:F1}GB / 总计 {totalGB:F1}GB"));
}
return Task.FromResult(HealthCheckResult.Healthy(
$"磁盘空间充足:可用 {freeGB:F1}GB / 总计 {totalGB:F1}GB"));
}
}内存使用检查
/// <summary>
/// 检查应用内存使用
/// </summary>
public class MemoryHealthCheck : IHealthCheck
{
private readonly long _thresholdBytes;
public MemoryHealthCheck(long thresholdBytes = 1024 * 1024 * 1024) // 默认 1GB
{
_thresholdBytes = thresholdBytes;
}
public Task<HealthCheckResult> CheckHealthAsync(HealthCheckContext context, CancellationToken ct = default)
{
var usedMemory = GC.GetTotalMemory(false);
var usedMB = usedMemory / (1024.0 * 1024);
if (usedMemory > _thresholdBytes)
{
return Task.FromResult(HealthCheckResult.Degraded(
$"内存使用偏高:{usedMB:F0}MB"));
}
return Task.FromResult(HealthCheckResult.Healthy(
$"内存使用正常:{usedMB:F0}MB"));
}
}分组与标签
Liveness vs Readiness
/// <summary>
/// K8s 风格 — Liveness 和 Readiness 分离
/// </summary>
builder.Services.AddHealthChecks()
// 存活检查 — 只检查进程是否存活
.AddCheck("self", () => HealthCheckResult.Healthy(), tags: new[] { "live" })
// 就绪检查 — 检查所有依赖是否就绪
.AddSqlServer(connectionString, tags: new[] { "ready" })
.AddRedis(redisConnection, tags: new[] { "ready" })
.AddCheck<ExternalApiHealthCheck>("external_api", tags: new[] { "ready" });
var app = builder.Build();
// K8s Liveness Probe — 进程是否存活
app.MapHealthChecks("/health/live", new HealthCheckOptions
{
Predicate = check => check.Tags.Contains("live"),
ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
});
// K8s Readiness Probe — 是否准备好接收流量
app.MapHealthChecks("/health/ready", new HealthCheckOptions
{
Predicate = check => check.Tags.Contains("ready"),
ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
});Health Check UI
可视化仪表盘
/// <summary>
/// 添加健康检查 UI 仪表盘
/// </summary>
// 安装包
// dotnet add package AspNetCore.HealthChecks.UI
builder.Services.AddHealthChecksUI(settings =>
{
settings.AddHealthCheckEndpoint("API", "/health/detail");
settings.SetEvaluationTimeInSeconds(10); // 每10秒检查一次
settings.SetMinimumSecondsBetweenFailureNotifications(60);
settings.SetHeaderText("系统健康监控");
})
.AddInMemoryDatabase(); // 使用内存存储(生产环境可换数据库)
var app = builder.Build();
// 详细健康检查(JSON 格式)
app.MapHealthChecks("/health/detail", new HealthCheckOptions
{
ResponseWriter = UIResponseWriter.WriteHealthCheckUIResponse
});
// UI 仪表盘
app.MapHealthChecksUI(options =>
{
options.UIPath = "/health-ui"; // UI 访问路径
options.ApiPath = "/health-ui-api"; // API 路径
options.UseRelativeApiPath = false;
});
// 访问 /health-ui 可看到可视化界面自定义响应格式
/// <summary>
/// 自定义健康检查响应格式
/// </summary>
public static class CustomHealthResponseWriter
{
public static Task WriteResponse(HttpContext context, HealthReport report)
{
context.Response.ContentType = "application/json";
var response = new
{
Status = report.Status.ToString(),
Duration = report.TotalDuration,
Checks = report.Entries.Select(e => new
{
Name = e.Key,
Status = e.Value.Status.ToString(),
Description = e.Value.Description,
Duration = e.Value.Duration,
Tags = e.Value.Tags,
Exception = e.Value.Exception?.Message
})
};
var json = JsonSerializer.Serialize(response, new JsonSerializerOptions { WriteIndented = true });
return context.Response.WriteAsync(json);
}
}
// 使用自定义响应
app.MapHealthChecks("/health", new HealthCheckOptions
{
ResponseWriter = CustomHealthResponseWriter.WriteResponse
});Kubernetes 集成
# K8s Deployment 中配置健康探针
apiVersion: apps/v1
kind: Deployment
spec:
template:
spec:
containers:
- name: api
image: myapp:latest
ports:
- containerPort: 8080
# 存活探针 — 失败则重启容器
livenessProbe:
httpGet:
path: /health/live
port: 8080
initialDelaySeconds: 30
periodSeconds: 10
timeoutSeconds: 5
failureThreshold: 3
# 就绪探针 — 失败则摘除流量
readinessProbe:
httpGet:
path: /health/ready
port: 8080
initialDelaySeconds: 10
periodSeconds: 5
timeoutSeconds: 3
failureThreshold: 3
# 启动探针 — 应用启动检测
startupProbe:
httpGet:
path: /health/live
port: 8080
failureThreshold: 30
periodSeconds: 10健康检查最佳实践
超时与性能优化
/// <summary>
/// 健康检查性能优化 — 避免慢检查拖垮整个健康端点
/// </summary>
public class TimeoutHealthCheckDecorator : IHealthCheck
{
private readonly IHealthCheck _inner;
private readonly TimeSpan _timeout;
public TimeoutHealthCheckDecorator(IHealthCheck inner, TimeSpan timeout)
{
_inner = inner;
_timeout = timeout;
}
public async Task<HealthCheckResult> CheckHealthAsync(
HealthCheckContext context, CancellationToken ct = default)
{
using var cts = CancellationTokenSource.CreateLinkedTokenSource(ct);
cts.CancelAfter(_timeout);
try
{
return await _inner.CheckHealthAsync(context, cts.Token);
}
catch (OperationCanceledException) when (!ct.IsCancellationRequested)
{
return HealthCheckResult.Degraded($"健康检查超时 ({_timeout.TotalSeconds}s)");
}
}
}
// 注册带超时的健康检查
builder.Services.AddHealthChecks()
.AddCheck<ExternalApiHealthCheck>("external_api",
timeout: TimeSpan.FromSeconds(3),
tags: new[] { "ready" });健康检查缓存
/// <summary>
/// 缓存健康检查结果 — 避免频繁执行昂贵的检查
/// </summary>
public class CachedHealthCheck : IHealthCheck, IDisposable
{
private readonly IHealthCheck _inner;
private readonly TimeSpan _cacheDuration;
private HealthCheckResult? _cachedResult;
private DateTime _lastChecked = DateTime.MinValue;
private readonly object _lock = new();
public CachedHealthCheck(IHealthCheck inner, TimeSpan cacheDuration)
{
_inner = inner;
_cacheDuration = cacheDuration;
}
public Task<HealthCheckResult> CheckHealthAsync(
HealthCheckContext context, CancellationToken ct = default)
{
lock (_lock)
{
if (_cachedResult != null && DateTime.UtcNow - _lastChecked < _cacheDuration)
{
return Task.FromResult(_cachedResult.Value);
}
}
// 执行实际检查
return _inner.CheckHealthAsync(context, ct).ContinueWith(task =>
{
if (task.IsCompletedSuccessfully)
{
lock (_lock)
{
_cachedResult = task.Result;
_lastChecked = DateTime.UtcNow;
}
}
return task.Result;
}, ct);
}
public void Dispose() => (_inner as IDisposable)?.Dispose();
}
// 使用方法:缓存数据库检查结果 30 秒
builder.Services.AddHealthChecks()
.AddCheck("sqlserver-cached",
sp => new CachedHealthCheck(
new SqlServerHealthCheck(connectionString),
TimeSpan.FromSeconds(30)),
tags: new[] { "ready" });Docker 健康检查集成
# Dockerfile 中配置健康检查
FROM mcr.microsoft.com/dotnet/aspnet:8.0
WORKDIR /app
COPY publish/ .
ENTRYPOINT ["dotnet", "MyApp.dll"]
# Docker 内置健康检查(补充 K8s 探针)
HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \
CMD curl -f http://localhost:8080/health/live || exit 1# Docker Compose 健康检查
services:
api:
build: .
ports:
- "8080:8080"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/health/live"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
# 依赖健康检查的服务
worker:
build: .
depends_on:
api:
condition: service_healthy多服务聚合健康检查
/// <summary>
/// 聚合多个下游服务的健康状态
/// </summary>
public class AggregateHealthCheck : IHealthCheck
{
private readonly IHttpClientFactory _httpClientFactory;
private readonly string[] _downstreamServices;
public AggregateHealthCheck(
IHttpClientFactory httpClientFactory,
string[] downstreamServices)
{
_httpClientFactory = httpClientFactory;
_downstreamServices = downstreamServices;
}
public async Task<HealthCheckResult> CheckHealthAsync(
HealthCheckContext context, CancellationToken ct = default)
{
var results = new Dictionary<string, HealthCheckResult>();
var client = _httpClientFactory.CreateClient("HealthCheck");
foreach (var service in _downstreamServices)
{
try
{
var response = await client.GetAsync($"{service}/health", ct);
if (response.IsSuccessStatusCode)
{
results[service] = HealthCheckResult.Healthy("正常");
}
else
{
results[service] = HealthCheckResult.Degraded(
$"状态码: {response.StatusCode}");
}
}
catch (Exception ex)
{
results[service] = HealthCheckResult.Unhealthy($"不可用: {ex.Message}");
}
}
var hasUnhealthy = results.Any(r => r.Value.Status == HealthStatus.Unhealthy);
var hasDegraded = results.Any(r => r.Value.Status == HealthStatus.Degraded);
var overallStatus = hasUnhealthy ? HealthStatus.Unhealthy
: hasDegraded ? HealthStatus.Degraded
: HealthStatus.Healthy;
var data = results.ToDictionary(
r => r.Key,
r => (object)r.Value.Description!);
return new HealthCheckResult(overallStatus,
$"检查了 {results.Count} 个服务", null, data);
}
}健康状态说明
| 状态 | 含义 | HTTP 状态码 |
|---|---|---|
| Healthy | 所有检查通过 | 200 |
| Degraded | 降级但可用 | 200 |
| Unhealthy | 检查失败 | 503 |
优点
缺点
总结
健康检查是生产环境必备的监控手段。存活探针检测进程状态,就绪探针检测依赖服务。K8s 环境下务必配置 livenessProbe 和 readinessProbe,配合标签过滤实现精细化健康管理。
关键知识点
- 先分清这个主题位于请求链路、后台任务链路还是基础设施链路。
- 服务端主题通常不只关心功能正确,还关心稳定性、性能和可观测性。
- 任何框架能力都要结合配置、生命周期、异常传播和外部依赖一起看。
项目落地视角
- 画清请求进入、业务执行、外部调用、日志记录和错误返回的完整路径。
- 为关键链路补齐超时、重试、熔断、追踪和结构化日志。
- 把配置与敏感信息分离,并明确不同环境的差异来源。
常见误区
- 只会堆中间件或组件,不知道它们在链路中的执行顺序。
- 忽略生命周期和线程池、连接池等运行时资源约束。
- 没有监控和测试就对性能或可靠性下结论。
进阶路线
- 继续向运行时行为、可观测性、发布治理和微服务协同深入。
- 把主题和数据库、缓存、消息队列、认证授权联动起来理解。
- 沉淀团队级模板,包括统一异常处理、配置约定和基础设施封装。
适用场景
- 当你准备把《健康检查 Health Check》真正落到项目里时,最适合先在一个独立模块或最小样例里验证关键路径。
- 适合 API 服务、后台任务、实时通信、认证授权和微服务协作场景。
- 当需求开始涉及稳定性、性能、可观测性和发布流程时,这类主题会成为基础设施能力。
落地建议
- 先定义请求链路与失败路径,再决定中间件、过滤器、服务边界和依赖方式。
- 为关键链路补日志、指标、追踪、超时与重试策略。
- 环境配置与敏感信息分离,避免把生产参数写死在代码或镜像里。
排错清单
- 先确认问题发生在路由、模型绑定、中间件、业务层还是基础设施层。
- 检查 DI 生命周期、配置来源、序列化规则和认证上下文。
- 查看线程池、连接池、缓存命中率和外部依赖超时。
复盘问题
- 如果把《健康检查 Health Check》放进你的当前项目,最先要验证的输入、输出和失败路径分别是什么?
- 《健康检查 Health Check》最容易在什么规模、什么边界条件下暴露问题?你会用什么指标或日志去确认?
- 相比默认实现或替代方案,采用《健康检查 Health Check》最大的收益和代价分别是什么?
