1. SpringBoot Actuator监控端点配置
Spring Boot Actuator 提供了丰富的生产就绪特性来监控和管理应用。以下是 Actuator 监控端点的详细配置指南:
基本依赖
<dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-actuator</artifactId> </dependency>端点配置
2.1 启用/禁用端点
management: endpoints: web: exposure: # 暴露所有端点(生产环境谨慎使用) include: "*" # 或指定特定端点 # include: health,info,metrics,prometheus # 禁用所有端点 # exposure: # exclude: "*" # 启用/禁用特定端点 enabled-by-default: true jmx: exposure: include: "*"2.2 常用端点列表
management: endpoint: # 健康检查 health: enabled: true show-details: always # always, when-authorized, never show-components: always # 应用信息 info: enabled: true # 指标数据 metrics: enabled: true # Prometheus 格式指标 prometheus: enabled: true # 线程转储 threaddump: enabled: true # 堆转储 heapdump: enabled: true # 配置属性 configprops: enabled: true # 环境变量 env: enabled: true # 日志级别管理 loggers: enabled: true # HTTP 请求追踪 httptrace: enabled: true # 审计事件 auditevents: enabled: true # 调度任务 scheduledtasks: enabled: true # Spring Beans beans: enabled: true # 条件评估报告 conditions: enabled: true # 映射信息 mappings: enabled: true # 关闭应用(生产环境禁用!) shutdown: enabled: false安全配置
3.1 基本安全
spring: security: user: name: admin password: ${ACTUATOR_PASSWORD:admin123} roles: ACTUATOR_ADMIN management: endpoints: web: base-path: /manage # 自定义基础路径 path-mapping: health: healthcheck exposure: include: health,info,metrics3.2 Spring Security 集成
@Configuration @EnableWebSecurity public class ActuatorSecurityConfig extends WebSecurityConfigurerAdapter { @Override protected void configure(HttpSecurity http) throws Exception { http .requestMatcher(EndpointRequest.toAnyEndpoint()) .authorizeRequests() .requestMatchers(EndpointRequest.to("health", "info")) .permitAll() .requestMatchers(EndpointRequest.toAnyEndpoint()) .hasRole("ACTUATOR_ADMIN") .and() .httpBasic(); } }健康检查配置
4.1 自定义健康指示器
@Component public class CustomHealthIndicator implements HealthIndicator { @Override public Health health() { boolean isHealthy = checkService(); if (isHealthy) { return Health.up() .withDetail("service", "available") .withDetail("responseTime", "100ms") .build(); } else { return Health.down() .withDetail("service", "unavailable") .withDetail("error", "Connection timeout") .build(); } } private boolean checkService() { // 实现健康检查逻辑 return true; } }4.2 健康检查组
management: endpoint: health: show-details: when-authorized group: custom: include: diskSpace,customHealthIndicator readiness: include: db,redis liveness: include: ping # 健康检查状态顺序 status: order: "DOWN,OUT_OF_SERVICE,UP,UNKNOWN"指标配置
5.1 Micrometer 配置
management: metrics: export: # Prometheus 配置 prometheus: enabled: true step: 1m descriptions: true # InfluxDB 配置 influx: enabled: false db: springboot uri: http://localhost:8086 step: 1m # StatsD 配置 statsd: enabled: false host: localhost port: 8125 flavor: datadog # 自定义指标标签 tags: application: ${spring.application.name} environment: ${spring.profiles.active:default} # 启用/禁用特定指标 enable: jvm: true system: true logback: true tomcat: true http: true process: true # 指标分布统计 distribution: percentiles-histogram: http.server.requests: true percentiles: http.server.requests: 0.5,0.95,0.99 sla: http.server.requests: 10ms,50ms,100ms,200ms,500ms,1s,2s5.2 自定义指标
@Service public class OrderService { private final MeterRegistry meterRegistry; private final Counter orderCounter; private final Timer orderTimer; public OrderService(MeterRegistry meterRegistry) { this.meterRegistry = meterRegistry; // 创建计数器 this.orderCounter = Counter.builder("orders.total") .description("Total number of orders") .tag("service", "order-service") .register(meterRegistry); // 创建计时器 this.orderTimer = Timer.builder("orders.process.time") .description("Time taken to process orders") .register(meterRegistry); } public void processOrder() { orderCounter.increment(); orderTimer.record(() -> { // 处理订单逻辑 try { Thread.sleep(100); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } }); } }信息端点配置
management: info: env: enabled: true java: enabled: true os: enabled: true info: app: name: "@project.name@" version: "@project.version@" description: "@project.description@" environment: ${spring.profiles.active} contact: email: admin@example.com phone: 123-456-7890 custom: feature: "New payment system" status: "Active"日志管理
management: endpoint: loggers: enabled: true logging: level: root: INFO com.example: DEBUG org.springframework.web: INFO通过 /actuator/loggers 端点动态调整日志级别:
# 查看所有日志级别 GET /actuator/loggers # 查看特定包日志级别 GET /actuator/loggers/com.example # 修改日志级别 POST /actuator/loggers/com.example Content-Type: application/json { "configuredLevel": "DEBUG" }HTTP 追踪
management: tracing: sampling: probability: 1.0 # 采样率 endpoint: httptrace: enabled: true生产环境建议配置
# application-prod.yml management: endpoints: web: exposure: include: health,info,metrics,prometheus base-path: /internal endpoint: health: show-details: when-authorized probes: enabled: true group: readiness: include: "*" liveness: include: ping,diskSpace metrics: export: prometheus: enabled: true tags: application: ${spring.application.name} environment: prod region: ${REGION:unknown} server: port: 9090 # 与主应用端口分离 # 添加延迟,防止启动时立即标记为就绪 health: readiness-state: enabled: true liveness-state: enabled: true # 健康检查路径 spring: application: name: my-service # Kubernetes 探针配置 cloud: kubernetes: probes: liveness: path: /internal/health/liveness initial-delay: 60 period: 10 readiness: path: /internal/health/readiness initial-delay: 30 period: 10自定义端点
@Component @Endpoint(id = "custom") public class CustomEndpoint { @ReadOperation public Map<String, Object> getInfo() { Map<String, Object> info = new HashMap<>(); info.put("status", "OK"); info.put("timestamp", Instant.now()); info.put("version", "1.0.0"); return info; } @WriteOperation public String executeOperation(@Selector String operation) { return "Executed: " + operation; } @DeleteOperation public String reset() { return "Reset completed"; } }监控集成
Prometheus + Grafana
# prometheus.yml scrape_configs: - job_name: 'spring-boot' metrics_path: '/actuator/prometheus' static_configs: - targets: ['localhost:8080'] scrape_interval: 15s监控面板建议
- JVM 监控 :堆内存、GC、线程数
- HTTP 监控 :请求率、延迟、错误率
- 系统监控 :CPU、内存、磁盘
- 业务监控 :自定义指标
注意事项
- 生产安全 :确保敏感端点(如 shutdown, env)受到保护
- 性能影响 :某些端点(如 heapdump)可能影响性能
- 网络隔离 :将监控端点限制在内网访问
- 版本兼容 :不同 Spring Boot 版本端点可能有所差异
- 资源限制 :合理设置端点响应数据大小
这个配置指南涵盖了 Spring Boot Actuator 的主要功能,可以根据实际需求进行调整。
2. SpringBoot Admin可视化监控平台
一、什么是 Spring Boot Admin?
Spring Boot Admin 是一个用于管理和监控 Spring Boot 应用程序的开源工具,它提供了:
- 统一的 Web UI 管理界面
- 实时监控应用状态
- 详细的健康检查
- 日志查看和管理
- JMX beans 管理
- 线程和内存监控
- 环境属性查看
二、核心架构
两个主要角色
- Admin Server :监控中心服务器
- Admin Client :被监控的 Spring Boot 应用
通信方式
- HTTP(通过 Actuator 端点)
- Spring Cloud Discovery(Eureka, Consul, Nacos)
- 直接注册
三、快速开始
Admin Server 搭建
<!-- pom.xml --> <dependency> <groupId>de.codecentric</groupId> <artifactId>spring-boot-admin-starter-server</artifactId> <version>2.7.10</version> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency># application.yml server: port: 8080 spring: application: name: admin-serverClient 应用配置
<!-- client pom.xml --> <dependency> <groupId>de.codecentric</groupId> <artifactId>spring-boot-admin-starter-client</artifactId> <version>2.7.10</version> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-actuator</artifactId> </dependency> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> </dependency># client application.yml spring: application: name: user-service boot: admin: client: url: http://localhost:8080 # Admin Server地址 instance: service-url: http://localhost:8081 server: port: 8081 management: endpoints: web: exposure: include: "*" # 暴露所有监控端点 endpoint: health: show-details: always四、核心功能详解
应用监控面板
- 应用列表 :显示所有注册的应用
- 状态概览 :UP/DOWN/OFFLINE
- 详细信息 :版本、运行时间、IP地址
健康检查
- 数据库健康状态
- 磁盘空间
- 自定义健康指示器
性能监控
# 开启性能监控 management: metrics: export: prometheus: enabled: true enable: jvm: true system: true logback: true日志管理
// 动态调整日志级别 @RestController public class LoggingController { @PostMapping("/actuator/loggers/{name}") public void configureLogLevel(@PathVariable String name, @RequestBody Map<String, String> data) { // 动态修改日志级别 } }五、高级配置
安全配置
@Configuration public class SecurityConfig extends WebSecurityConfigurerAdapter { @Override protected void configure(HttpSecurity http) throws Exception { http .authorizeRequests() .antMatchers("/assets/**").permitAll() .antMatchers("/login").permitAll() .anyRequest().authenticated() .and() .formLogin().loginPage("/login") .and() .logout().logoutUrl("/logout") .and() .httpBasic() .and() .csrf().disable(); } }邮件/钉钉通知
spring: boot: admin: notify: mail: enabled: true to: admin@example.com from: monitor@example.com dingtalk: enabled: true webhook-url: https://oapi.dingtalk.com/robot/send secret: your-secret集成 Prometheus + Grafana
management: endpoints: web: exposure: include: health,info,prometheus metrics: export: prometheus: enabled: true六、集群部署方案
使用 Nacos 服务发现
# Admin Server spring: cloud: nacos: discovery: server-addr: localhost:8848 boot: admin: context-path: /admin # Client 应用 spring: cloud: nacos: discovery: server-addr: localhost:8848 boot: admin: client: url: http://localhost:8080/admin多 Admin Server 负载均衡
[Nginx/LB] | --------------------------------- | | [Admin Server 1] [Admin Server 2] | | --------------------------------- | [Nacos/Eureka] | --------------------------------- | | | [Service A] [Service B] [Service C]七、最佳实践
生产环境配置建议
spring: boot: admin: # 启用状态缓存,减少频繁查询 monitor: status-interval: 10s status-lifetime: 10s # 通知配置 notify: reminder: enabled: true period: 60s自定义监控指标
@Component public class CustomHealthIndicator implements HealthIndicator { @Override public Health health() { // 检查外部依赖 boolean externalServiceUp = checkExternalService(); if (externalServiceUp) { return Health.up() .withDetail("externalService", "available") .build(); } else { return Health.down() .withDetail("externalService", "unavailable") .build(); } } }监控大屏展示
// 自定义监控面板 dashboard: views: custom-view: title: "业务监控" order: 0 widgets: - title: "今日订单量" type: "counter" url: "/actuator/metrics/orders.today"八、常见问题解决
客户端无法注册
- 检查网络连通性
- 确认 Actuator 端点已暴露
- 验证安全配置
监控数据不更新
# 调整轮询间隔 spring: boot: admin: monitor: default-timeout: 10000内存泄漏监控
@EventListener public void handleOOMEvent(HeapDumpWebRequestEvent event) { // 处理堆转储事件 logger.warn("Potential memory leak detected!"); }九、替代方案比较
工具 | 优点 | 缺点 |
Spring Boot Admin | 与Spring Boot深度集成,配置简单 | 功能相对基础 |
Prometheus + Grafana | 功能强大,可视化丰富 | 配置复杂,需要额外组件 |
SkyWalking | 全链路追踪,APM功能强 | 资源消耗较大 |
ELK Stack | 日志分析强大 | 主要用于日志,监控功能有限 |
十、总结
Spring Boot Admin 是中小型 Spring Boot 项目监控的理想选择,它:
- 开箱即用 :简单配置即可获得完整监控能力
- 轻量级 :资源消耗小,部署简单
- 功能全面 :覆盖了应用监控的主要需求
- 可扩展 :支持自定义指标和通知
- 社区活跃 :持续更新和维护
对于大型分布式系统,建议结合 Prometheus + Grafana 使用,Spring Boot Admin 作为补充,提供应用级别的管理和控制功能。
3. 项目健康检查、指标监控、日志监控
一、健康检查 (Health Checks)
内置健康指示器
# 访问端点 GET /actuator/health # 响应示例 { "status": "UP", "components": { "db": { "status": "UP", "details": { "database": "MySQL", "validationQuery": "isValid()" } }, "diskSpace": { "status": "UP", "details": { "total": 500107862016, "free": 300107862016, "threshold": 10485760 } }, "ping": { "status": "UP" } } }自定义健康检查
@Component public class CustomHealthIndicator implements HealthIndicator { @Override public Health health() { // 检查自定义逻辑 boolean isServiceHealthy = checkService(); if (isServiceHealthy) { return Health.up() .withDetail("service", "available") .withDetail("responseTime", "50ms") .build(); } return Health.down() .withDetail("service", "unavailable") .withDetail("error", "Connection timeout") .build(); } private boolean checkService() { // 实现检查逻辑 return true; } }数据库健康检查
spring: datasource: hikari: health-check-properties: connectTimeout: 1000 connectionTestQuery: "SELECT 1"二、指标监控 (Metrics)
Micrometer 集成
<!-- 添加 Micrometer 依赖 --> <dependency> <groupId>io.micrometer</groupId> <artifactId>micrometer-registry-prometheus</artifactId> </dependency># 配置 Prometheus management: metrics: export: prometheus: enabled: true tags: application: ${spring.application.name}自定义指标
@Component public class CustomMetrics { private final MeterRegistry meterRegistry; private final Counter requestCounter; private final Timer responseTimer; private final DistributionSummary payloadSize; public CustomMetrics(MeterRegistry meterRegistry) { this.meterRegistry = meterRegistry; // 创建计数器 this.requestCounter = Counter.builder("custom.requests") .description("Total number of requests") .tag("type", "api") .register(meterRegistry); // 创建计时器 this.responseTimer = Timer.builder("custom.response.time") .description("Response time of API") .register(meterRegistry); // 创建分布摘要 this.payloadSize = DistributionSummary.builder("custom.payload.size") .baseUnit("bytes") .register(meterRegistry); } public void recordRequest() { requestCounter.increment(); } public void recordResponseTime(long duration, TimeUnit unit) { responseTimer.record(duration, unit); } public void recordPayloadSize(long size) { payloadSize.record(size); } }JVM 和系统指标
@RestController public class MetricsController { @Autowired private MeterRegistry meterRegistry; @GetMapping("/metrics/jvm") public Map<String, Object> getJvmMetrics() { Map<String, Object> metrics = new HashMap<>(); // 获取 JVM 内存使用情况 metrics.put("heap.used", meterRegistry.get("jvm.memory.used").gauge().value()); metrics.put("heap.max", meterRegistry.get("jvm.memory.max").gauge().value()); // GC 信息 metrics.put("gc.count", meterRegistry.get("jvm.gc.pause").timer().count()); metrics.put("gc.time", meterRegistry.get("jvm.gc.pause").timer().totalTime(TimeUnit.MILLISECONDS)); // 线程信息 metrics.put("threads.live", meterRegistry.get("jvm.threads.live").gauge().value()); metrics.put("threads.daemon", meterRegistry.get("jvm.threads.daemon").gauge().value()); return metrics; } }三、日志监控 (Loggers)
动态日志级别调整
# 访问端点 GET /actuator/loggers # 查看所有日志级别 POST /actuator/loggers/{loggerName} # 修改日志级别 # 请求体示例 { "configuredLevel": "DEBUG" }日志配置类
@Configuration @Slf4j public class LoggingConfig { @PostConstruct public void init() { // 初始化日志配置 LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory(); // 动态添加日志器 ch.qos.logback.classic.Logger logger = loggerContext.getLogger("com.example"); logger.setLevel(Level.DEBUG); } @RestController @RequestMapping("/api/logging") public class LoggingController { @PostMapping("/level") public ResponseEntity<?> changeLogLevel( @RequestParam String logger, @RequestParam String level) { // 获取 LoggerContext LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory(); // 设置日志级别 ch.qos.logback.classic.Logger logbackLogger = loggerContext.getLogger(logger); logbackLogger.setLevel(Level.valueOf(level)); log.info("Changed log level for {} to {}", logger, level); return ResponseEntity.ok().build(); } @GetMapping("/levels") public Map<String, String> getLogLevels() { Map<String, String> levels = new HashMap<>(); LoggerContext loggerContext = (LoggerContext) LoggerFactory.getILoggerFactory(); for (ch.qos.logback.classic.Logger logger : loggerContext.getLoggerList()) { if (logger.getLevel() != null) { levels.put(logger.getName(), logger.getLevel().toString()); } } return levels; } } }日志文件监控
management: endpoint: logfile: enabled: true external-file: logs/application.log # 指定日志文件路径@Component public class LogFileMonitor { private static final Logger log = LoggerFactory.getLogger(LogFileMonitor.class); private final String logFilePath = "logs/application.log"; private long lastFileSize = 0; @Scheduled(fixedDelay = 30000) // 每30秒检查一次 public void monitorLogFile() { try { File logFile = new File(logFilePath); if (!logFile.exists()) { log.warn("Log file does not exist: {}", logFilePath); return; } long currentSize = logFile.length(); if (currentSize > lastFileSize) { long newBytes = currentSize - lastFileSize; log.info("Log file has grown by {} bytes", newBytes); // 可以在这里添加报警逻辑 if (newBytes > 1024 * 1024) { // 1MB log.warn("Log file growing too fast!"); } } lastFileSize = currentSize; } catch (Exception e) { log.error("Error monitoring log file", e); } } @EventListener public void handleLogEvent(ApplicationEvent event) { // 监听特定日志事件 if (event instanceof LoggingEvent) { // 处理日志事件 } } }四、集成监控系统
集成 Prometheus + Grafana
# application-prometheus.yml management: metrics: export: prometheus: enabled: true step: 1m distribution: percentiles-histogram: http.server.requests: true percentiles: http.server.requests: 0.5, 0.95, 0.99 web: server: request: autotime: enabled: true集成 ELK Stack
@Configuration public class ElkConfig { @Bean public LogstashTcpSocketAppender logstashAppender() { LogstashTcpSocketAppender appender = new LogstashTcpSocketAppender(); appender.setName("LOGSTASH"); appender.setContext(loggerContext()); appender.addDestination("localhost:5000"); LogstashEncoder encoder = new LogstashEncoder(); encoder.setCustomFields("{\"app\":\"my-spring-app\"}"); appender.setEncoder(encoder); return appender; } @Bean public LoggerContext loggerContext() { return (LoggerContext) LoggerFactory.getILoggerFactory(); } }五、安全配置
@Configuration @EnableWebSecurity public class ActuatorSecurityConfig extends WebSecurityConfigurerAdapter { @Override protected void configure(HttpSecurity http) throws Exception { http .authorizeRequests() .antMatchers("/actuator/health", "/actuator/info").permitAll() .antMatchers("/actuator/**").hasRole("ADMIN") .anyRequest().authenticated() .and() .httpBasic() .and() .csrf().disable(); } @Bean @Override public UserDetailsService userDetailsService() { UserDetails user = User.withDefaultPasswordEncoder() .username("admin") .password("password") .roles("ADMIN") .build(); return new InMemoryUserDetailsManager(user); } }六、自定义端点
@Component @Endpoint(id = "custom") public class CustomEndpoint { private final List<String> messages = new ArrayList<>(); @ReadOperation public List<String> getMessages() { return messages; } @WriteOperation public void addMessage(@Selector String message) { messages.add(message); } @DeleteOperation public void deleteMessage(@Selector String message) { messages.remove(message); } }七、最佳实践建议
- 生产环境配置 :
management: endpoints: web: exposure: include: health,info,metrics,prometheus base-path: /internal/actuator endpoint: health: show-details: when_authorized probes: enabled: true- 健康检查分组 :
management: endpoint: health: group: readiness: include: db,diskSpace liveness: include: ping- 指标标签 :
@Bean MeterRegistryCustomizer<MeterRegistry> metricsCommonTags() { return registry -> registry.config() .commonTags("region", "us-east-1", "instance", InetAddress.getLocalHost().getHostName()); }这些配置和代码示例可以帮助你构建完整的 Spring Boot 应用监控体系。