一、引言:为什么需要监控?
1.1 微服务时代的监控挑战
在微服务架构盛行的今天,一个应用往往由多个服务组成,每个服务可能部署在不同的服务器上。传统的监控方式已无法满足需求:
服务数量激增:手动监控变得不现实
故障定位困难:问题可能在服务链的任何环节
性能瓶颈难寻:需要实时了解各服务状态
1.2 Spring Boot Actuator 的价值
Spring Boot Actuator 提供了生产级的监控功能,帮助我们:
✅实时健康检查:了解服务是否正常运行
✅性能指标收集:掌握系统运行状态
✅动态配置管理:无需重启调整参数
✅故障快速定位:提供详细的运行时信息
二、快速入门:第一个Actuator应用
2.1 环境准备
JDK 1.8+
Maven 3.2+ 或 Gradle
Spring Boot 2.x(本文基于2.7.x)
2.2 创建项目并添加依赖
Maven配置:
xml
<dependencies> <!-- Spring Boot Starter --> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency> <!-- Actuator核心依赖 --> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-actuator</artifactId> </dependency> <!-- 指标导出到Prometheus --> <dependency> <groupId>io.micrometer</groupId> <artifactId>micrometer-registry-prometheus</artifactId> </dependency> </dependencies>
Gradle配置:
gradle
dependencies { implementation 'org.springframework.boot:spring-boot-starter-web' implementation 'org.springframework.boot:spring-boot-starter-actuator' implementation 'io.micrometer:micrometer-registry-prometheus' }2.3 基础配置
yaml
# application.yml server: port: 8080 spring: application: name: actuator-demo management: endpoints: web: exposure: include: "*" # 暴露所有端点(生产环境慎用) base-path: /monitor # 自定义监控路径 endpoint: health: show-details: always # 显示健康检查详情 metrics: export: prometheus: enabled: true
2.4 启动并测试
创建一个简单的Spring Boot应用:
java
@SpringBootApplication @RestController public class ActuatorApplication { public static void main(String[] args) { SpringApplication.run(ActuatorApplication.class, args); } @GetMapping("/hello") public String hello() { return "Hello Actuator!"; } }启动后访问:http://localhost:8080/monitor/health
三、Actuator端点详解
3.1 健康检查端点(/health)
功能:应用健康状态检查
配置示例:
yaml
management: endpoint: health: show-details: always probes: enabled: true # 启用K8s探针支持 group: readiness: include: db,redis,custom liveness: include: diskSpace,ping
自定义健康指示器:
java
@Component public class CustomHealthIndicator implements HealthIndicator { @Autowired private ExternalService externalService; @Override public Health health() { boolean isServiceUp = checkExternalService(); if (isServiceUp) { return Health.up() .withDetail("service", "external-service") .withDetail("responseTime", "200ms") .build(); } else { return Health.down() .withDetail("service", "external-service") .withDetail("error", "Connection timeout") .withException(new RuntimeException("Service unavailable")) .build(); } } private boolean checkExternalService() { // 实现检查逻辑 return externalService.isAvailable(); } }3.2 指标端点(/metrics)
功能:收集和展示应用指标
内置指标分类:
JVM指标:内存、线程、类加载
系统指标:CPU、磁盘、网络
应用指标:HTTP请求、数据库连接
缓存指标:Redis、Cache
自定义指标示例:
java
@Service public class OrderService { private final MeterRegistry meterRegistry; private final Counter orderCounter; private final Timer orderTimer; public OrderService(MeterRegistry meterRegistry) { this.meterRegistry = meterRegistry; // 创建计数器 this.orderCounter = Counter.builder("orders.total") .description("Total number of orders") .tag("service", "order-service") .register(meterRegistry); // 创建计时器 this.orderTimer = Timer.builder("orders.process.time") .description("Order processing time") .tag("service", "order-service") .register(meterRegistry); } public void processOrder(Order order) { // 记录执行时间 orderTimer.record(() -> { // 业务逻辑 createOrder(order); orderCounter.increment(); }); } // 创建Gauge(瞬时值) @PostConstruct public void initGauge() { Gauge.builder("orders.queue.size", this, OrderService::getQueueSize) .description("Current order queue size") .register(meterRegistry); } private int getQueueSize() { // 返回当前队列大小 return 10; } }3.3 信息端点(/info)
功能:展示应用信息
配置方式:
yaml
info: app: name: "@project.name@" version: "@project.version@" description: "订单服务系统" build: artifact: "@project.artifactId@" group: "@project.groupId@" time: "${build.time}" java: version: "@java.version@" contact: email: "admin@example.com" phone: "400-123-4567"编程方式添加信息:
java
@Component public class BuildInfoContributor implements InfoContributor { @Value("${git.commit.id.abbrev}") private String commitId; @Value("${git.branch}") private String branch; @Override public void contribute(Info.Builder builder) { Map<String, Object> details = new HashMap<>(); details.put("commitId", commitId); details.put("branch", branch); details.put("buildTime", LocalDateTime.now()); details.put("deployedBy", System.getProperty("user.name")); builder.withDetail("build", details) .withDetail("environment", getEnvironmentInfo()); } private Map<String, String> getEnvironmentInfo() { Map<String, String> env = new HashMap<>(); env.put("activeProfile", getActiveProfile()); env.put("os", System.getProperty("os.name")); return env; } }3.4 环境端点(/env)
功能:查看所有环境配置
敏感信息过滤:
java
@Configuration public class EnvEndpointConfig { @Bean @ConditionalOnBean(EnvironmentEndpoint.class) public EnvironmentEndpoint environmentEndpoint(Environment environment) { EnvironmentEndpoint endpoint = new EnvironmentEndpoint(environment); // 添加转换器,隐藏敏感信息 endpoint.setKeysToSanitize("password", "secret", "key", "token", ".*credentials.*", "vcap_services"); return endpoint; } }3.5 配置端点(/configprops)
功能:显示@ConfigurationProperties的内容
示例配置类:
java
@Configuration @ConfigurationProperties(prefix = "app.datasource") @Validated @Data public class DataSourceProperties { @NotEmpty private String url; @NotEmpty private String username; @NotEmpty private String password; @Min(1) private int maxPoolSize = 10; @DurationUnit(ChronoUnit.SECONDS) private Duration connectionTimeout = Duration.ofSeconds(30); }3.6 日志级别端点(/loggers)
功能:动态调整日志级别
使用示例:
java
@RestController @RequestMapping("/api/logging") public class LoggingController { private final LoggersEndpoint loggersEndpoint; @PostMapping("/level") public ResponseEntity<?> changeLogLevel( @RequestParam String loggerName, @RequestParam String level) { // 验证日志级别 if (!isValidLogLevel(level)) { return ResponseEntity.badRequest() .body("Invalid log level: " + level); } // 构建配置 LoggersEndpoint.LoggerLevels levels = new LoggersEndpoint.LoggerLevels(level, level); // 应用配置 loggersEndpoint.configureLogLevel(loggerName, levels); return ResponseEntity.ok("Log level updated"); } @GetMapping("/levels") public Map<String, Object> getLogLevels() { return loggersEndpoint.loggers(); } }3.7 线程端点(/threaddump)
功能:获取线程转储信息
自定义线程分析:
java
@Component public class ThreadAnalysisService { @Autowired private ThreadDumpEndpoint threadDumpEndpoint; public ThreadAnalysisResult analyze() { ThreadDumpDescriptor dump = threadDumpEndpoint.threadDump(); ThreadAnalysisResult result = new ThreadAnalysisResult(); // 分析线程状态 Map<Thread.State, Long> stateCount = dump.getThreads().stream() .collect(Collectors.groupingBy( ThreadDumpEndpoint.ThreadInfo::getThreadState, Collectors.counting() )); // 检测死锁 List<ThreadDumpEndpoint.ThreadInfo> deadlockedThreads = dump.getThreads().stream() .filter(ThreadDumpEndpoint.ThreadInfo::isInDeadlock) .collect(Collectors.toList()); // 分析线程CPU占用 List<ThreadDumpEndpoint.ThreadInfo> highCpuThreads = dump.getThreads().stream() .filter(t -> t.getCpuTime() > 1000000000L) // 1秒 .sorted(Comparator.comparingLong( ThreadDumpEndpoint.ThreadInfo::getCpuTime).reversed()) .limit(10) .collect(Collectors.toList()); result.setStateDistribution(stateCount); result.setDeadlockedThreads(deadlockedThreads); result.setHighCpuThreads(highCpuThreads); return result; } }3.8 其他重要端点
| 端点路径 | 功能 | 生产环境建议 |
|---|---|---|
/beans | 显示所有Spring Beans | 谨慎暴露 |
/mappings | 显示所有@RequestMapping | 可暴露 |
/conditions | 显示自动配置条件 | 开发环境使用 |
/scheduledtasks | 显示定时任务 | 可暴露 |
/httptrace | 显示HTTP请求跟踪 | 谨慎暴露 |
/caches | 显示缓存信息 | 可暴露 |
四、安全配置
4.1 集成Spring Security
xml
<dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-security</artifactId> </dependency>
4.2 安全配置类
java
@Configuration @EnableWebSecurity public class ActuatorSecurityConfig extends WebSecurityConfigurerAdapter { @Override protected void configure(HttpSecurity http) throws Exception { http .authorizeRequests() // 健康检查端点允许所有人访问 .antMatchers("/monitor/health", "/monitor/info").permitAll() // Prometheus端点需要监控权限 .antMatchers("/monitor/prometheus").hasRole("MONITOR") // 其他监控端点需要管理员权限 .antMatchers("/monitor/**").hasRole("ADMIN") .anyRequest().authenticated() .and() .httpBasic() .and() .csrf() .disable() .sessionManagement() .sessionCreationPolicy(SessionCreationPolicy.STATELESS); } @Override protected void configure(AuthenticationManagerBuilder auth) throws Exception { auth.inMemoryAuthentication() .withUser("monitor") .password(passwordEncoder().encode("monitor123")) .roles("MONITOR") .and() .withUser("admin") .password(passwordEncoder().encode("admin123")) .roles("ADMIN", "MONITOR"); } @Bean public PasswordEncoder passwordEncoder() { return new BCryptPasswordEncoder(); } }4.3 JWT认证集成
java
@Component public class JwtActuatorFilter extends GenericFilterBean { @Override public void doFilter(ServletRequest request, ServletResponse response, FilterChain chain) throws IOException, ServletException { HttpServletRequest httpRequest = (HttpServletRequest) request; String path = httpRequest.getRequestURI(); // 只处理监控端点 if (path.startsWith("/monitor/")) { String token = extractToken(httpRequest); if (token != null && validateToken(token)) { // 验证通过,设置安全上下文 Authentication auth = createAuthentication(token); SecurityContextHolder.getContext().setAuthentication(auth); } else { // 验证失败,返回401 HttpServletResponse httpResponse = (HttpServletResponse) response; httpResponse.setStatus(HttpStatus.UNAUTHORIZED.value()); return; } } chain.doFilter(request, response); } // JWT验证逻辑... }五、高级特性
5.1 自定义端点
java
@Component @Endpoint(id = "custom") public class CustomEndpoint { private final List<Operation> operations = new ArrayList<>(); @ReadOperation public CustomData getData() { CustomData data = new CustomData(); data.setTimestamp(LocalDateTime.now()); data.setStatus("OK"); data.setMetrics(getSystemMetrics()); return data; } @WriteOperation public String executeCommand(@Selector String command, @Nullable String param) { switch (command) { case "clearCache": clearCache(); return "Cache cleared"; case "reloadConfig": reloadConfig(); return "Config reloaded"; default: return "Unknown command"; } } @DeleteOperation public String deleteResource(@Selector String resourceId) { // 删除资源逻辑 return "Resource deleted: " + resourceId; } private Map<String, Object> getSystemMetrics() { Map<String, Object> metrics = new HashMap<>(); // 获取系统指标 Runtime runtime = Runtime.getRuntime(); metrics.put("freeMemory", runtime.freeMemory()); metrics.put("totalMemory", runtime.totalMemory()); metrics.put("maxMemory", runtime.maxMemory()); metrics.put("availableProcessors", runtime.availableProcessors()); // 获取线程数 metrics.put("threadCount", Thread.activeCount()); return metrics; } }5.2 端点分组
yaml
management: endpoints: web: exposure: include: "health,info,custom" group: web: include: "mappings,httptrace" database: include: "datasource,hikari" base-path: "/database" jvm: include: "heapdump,threaddump,jolokia" base-path: "/jvm"
5.3 响应缓存
java
@Configuration public class EndpointCacheConfig { @Bean public FilterRegistrationBean<CachingFilter> cachingFilter() { FilterRegistrationBean<CachingFilter> registration = new FilterRegistrationBean<>(); registration.setFilter(new CachingFilter()); registration.addUrlPatterns("/monitor/metrics/*"); registration.addInitParameter("cache-control", "max-age=60, public"); return registration; } }六、集成监控系统
6.1 Prometheus集成
yaml
management: metrics: export: prometheus: enabled: true step: 1m # 抓取间隔 descriptions: true graphite: enabled: false influx: enabled: false distribution: percentiles-histogram: http.server.requests: true sla: http.server.requests: 100ms, 200ms, 500ms, 1s tags: application: ${spring.application.name} instance: ${HOSTNAME:${random.value}}6.2 Grafana仪表板配置
创建grafana-dashboard.json:
json
{ "dashboard": { "title": "Spring Boot Actuator Metrics", "panels": [ { "title": "JVM Memory", "targets": [ { "expr": "jvm_memory_used_bytes{area=\"heap\"}", "legendFormat": "Heap Used" }, { "expr": "jvm_memory_max_bytes{area=\"heap\"}", "legendFormat": "Heap Max" } ] } ] } }6.3 ELK集成
java
@Configuration public class ElkConfig { @Bean public LogstashTcpSocketAppender logstashAppender() { LogstashTcpSocketAppender appender = new LogstashTcpSocketAppender(); appender.setName("logstash"); appender.setRemoteHost("localhost"); appender.setPort(5000); appender.setEncoder(logstashEncoder()); return appender; } @Bean public LogstashEncoder logstashEncoder() { return new LogstashEncoder(); } }七、性能优化
7.1 端点性能优化
yaml
management: endpoint: health: enabled: true cache: time-to-live: 10s # 缓存10秒 metrics: enabled: true prometheus: enabled: true endpoints: web: base-path: /manage exposure: include: "health,info,prometheus"
7.2 异步端点
java
@Component @Endpoint(id = "async-metrics") public class AsyncMetricsEndpoint { @ReadOperation public CompletableFuture<MetricsData> getMetrics() { return CompletableFuture.supplyAsync(() -> { // 耗时的指标收集逻辑 return collectMetrics(); }); } private MetricsData collectMetrics() { // 模拟耗时操作 try { Thread.sleep(1000); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } MetricsData data = new MetricsData(); // 填充数据 return data; } }7.3 批量操作
java
@Component @Endpoint(id = "batch-operations") public class BatchOperationsEndpoint { @WriteOperation public BatchResult executeBatch(@RequestBody BatchRequest request) { BatchResult result = new BatchResult(); request.getOperations().parallelStream().forEach(op -> { try { executeSingleOperation(op); result.addSuccess(op.getId()); } catch (Exception e) { result.addFailure(op.getId(), e.getMessage()); } }); return result; } }八、生产环境最佳实践
8.1 安全配置清单
yaml
# production-actuator.yml management: endpoints: web: exposure: include: "health,info,metrics,prometheus" base-path: /internal/monitor path-mapping: health: "status" jmx: exposure: exclude: "*" endpoint: health: show-details: when-authorized roles: "ACTUATOR" shutdown: enabled: false server: port: 9090 # 使用不同端口 address: 127.0.0.1 # 只允许本地访问 metrics: export: prometheus: enabled: true
8.2 监控告警配置
java
@Component public class HealthAlertService { @EventListener public void handleHealthChanged(HealthEndpointWebExtension.HealthResult result) { HealthComponent health = result.getHealth(); if (health.getStatus() == Status.DOWN) { // 发送告警 sendAlert("服务健康状态异常", health.getDetails()); } // 检查特定组件 if (health.getComponents().containsKey("db")) { HealthComponent dbHealth = health.getComponents().get("db"); if (dbHealth.getStatus() == Status.DOWN) { sendAlert("数据库连接异常", dbHealth.getDetails()); } } } private void sendAlert(String title, Map<String, Object> details) { // 实现告警发送逻辑 // 1. 发送邮件 // 2. 发送短信 // 3. 发送到监控平台 } }8.3 多环境配置
yaml
# application-dev.yml management: endpoints: web: exposure: include: "*" # application-prod.yml management: endpoints: web: exposure: include: "health,info,prometheus" base-path: /manage server: port: 9090 ssl: enabled: true
九、故障排查
9.1 常见问题
问题1:端点无法访问
解决方案:
yaml
# 检查配置 management: endpoints: web: exposure: include: "health" base-path: /actuator # 默认路径
问题2:健康检查显示DOWN
排查步骤:
检查依赖服务连接
检查磁盘空间
查看详细错误信息
bash
curl -H "Accept: application/json" http://localhost:8080/monitor/health
问题3:指标数据不准确
排查步骤:
java
// 启用调试日志 logging: level: io.micrometer: DEBUG org.springframework.boot.actuate: DEBUG
9.2 诊断工具
java
@Component public class ActuatorDiagnosticTool { @Autowired private ApplicationContext context; public void diagnose() { // 1. 检查端点Bean Map<String, Endpoint> endpoints = context.getBeansOfType(Endpoint.class); log.info("找到{}个端点", endpoints.size()); // 2. 检查健康指示器 Map<String, HealthIndicator> indicators = context.getBeansOfType(HealthIndicator.class); // 3. 检查Web端点暴露情况 EndpointDiscoverer discoverer = context.getBean(EndpointDiscoverer.class); Collection<ExposableEndpoint<?>> exposed = discoverer.getEndpoints(); } }十、扩展与定制
10.1 自定义健康检查聚合
java
@Component public class CompositeHealthAggregator implements HealthAggregator { @Override public Health aggregate(Map<String, Health> healths) { StatusAggregator aggregator = new OrderedHealthAggregator(); Status status = aggregator.getAggregateStatus( healths.values().stream() .map(Health::getStatus) .collect(Collectors.toSet()) ); Health.Builder builder = Health.status(status); // 添加详细组件信息 healths.forEach((name, health) -> { builder.withDetail(name, health.getDetails()); }); // 添加聚合指标 builder.withDetail("totalComponents", healths.size()); builder.withDetail("upComponents", healths.values().stream() .filter(h -> h.getStatus() == Status.UP) .count() ); return builder.build(); } }10.2 自定义指标导出器
java
@Component public class CustomMetricsExporter implements MeterRegistryCustomizer { @Override public void customize(MeterRegistry registry) { // 添加公共标签 registry.config().commonTags( "application", "my-app", "region", System.getenv("REGION"), "zone", System.getenv("ZONE") ); // 自定义指标命名约定 registry.config().namingConvention(new NamingConvention() { @Override public String name(String name, Meter.Type type, String baseUnit) { // 统一添加前缀 return "app_" + name.replace('.', '_'); } }); } }10.3 响应式端点支持
java
@Component @EndpointWebExtension(endpoint = HealthEndpoint.class) public class ReactiveHealthEndpointExtension { private final HealthEndpoint delegate; public Mono<HealthComponent> health() { return Mono.fromCallable(delegate::health) .subscribeOn(Schedulers.boundedElastic()); } @ReadOperation public Mono<HealthComponent> getHealth(@Selector String... path) { return Mono.fromCallable(() -> delegate.healthForPath(path)) .subscribeOn(Schedulers.boundedElastic()); } }结语
Spring Boot Actuator 是一个功能强大、可扩展性极高的监控解决方案。通过本文的学习,你应该能够:
✅理解Actuator的核心概念和架构
✅熟练配置和使用各种端点
✅实现自定义监控指标和健康检查
✅保障监控系统的安全性
✅集成主流监控平台
✅优化监控系统性能
✅在生产环境中正确部署
记住,监控系统的建设不是一蹴而就的,需要根据业务需求不断调整和优化。建议从核心指标开始,逐步完善监控体系。
附录
A. 常用端点参考表
| 端点 | 路径 | 描述 | 生产建议 |
|---|---|---|---|
| 健康检查 | /actuator/health | 应用健康状态 | ✅ 必开 |
| 指标 | /actuator/metrics | 各类指标 | ✅ 必开 |
| 信息 | /actuator/info | 应用信息 | ✅ 建议开 |
| 环境 | /actuator/env | 环境变量 | ⚠️ 谨慎 |
| 配置属性 | /actuator/configprops | 配置属性 | ⚠️ 谨慎 |
| 日志级别 | /actuator/loggers | 日志管理 | ⚠️ 谨慎 |
| 线程转储 | /actuator/threaddump | 线程信息 | ⚠️ 需要时开 |
| 堆转储 | /actuator/heapdump | 堆信息 | ⚠️ 需要时开 |
B. 推荐阅读
Spring Boot官方文档 - Actuator
Micrometer官方文档
Prometheus监控指南