笔记和作业
使用bayes_opt库的流程
1.# 定义目标函数
def knn_cv_score(n_neighbors, weights, metric): """ 输入KNN参数,返回交叉验证平均准确率 参数说明: - n_neighbors: 近邻数(整数,贝叶斯优化会自动处理浮点转整数) - weights: 权重方式(0=uniform,1=distance) - metric: 距离度量(0=euclidean,1=manhattan,2=chebyshev) """# 将连续参数转换为整数(贝叶斯优化默认传浮点,需转为KNN要求的类型) n_neighbors = int(round(n_neighbors)) # 近邻数必须是整数 weights = "uniform" if weights < 0.5 else "distance" # 类别参数映射 metric_map = {0: "euclidean", 1: "manhattan", 2: "chebyshev"} metric = metric_map[int(round(metric))] # 创建模型 knn = KNeighborsClassifier( n_neighbors=n_neighbors, weights=weights, metric=metric, n_jobs=-1 # 并行计算加速 ) # 5折交叉验证,返回平均准确率 cv_scores = cross_val_score(knn, X_train, y_train, cv=5, scoring="accuracy") return cv_scores.mean()2.# 定义参数搜索空间
# 注意:贝叶斯优化仅支持连续型参数,类别型需映射为数值区间 pbounds = { "n_neighbors": (3, 30), # 近邻数范围:3~30 "weights": (0, 1), # 0→uniform,1→distance "metric": (0, 2) # 0→euclidean,1→manhattan,2→chebyshev } for param, (low, high) in pbounds.items(): # items方法返回字典的键值对 range_size = high - low print(f" {param:20s}: [{low:7.1f}, {high:7.1f}] (范围: {range_size:7.1f})")3.# 创建贝叶斯优化器,优化的过程已经被这个对象封装了
# 初始化优化器(verbose=2:打印迭代过程) optimizer = BayesianOptimization( f=knn_cv_score, pbounds=pbounds, random_state=42, verbose=2 ) start_time = time.time() # 开始优化(n_iter:迭代次数,init_points:初始随机搜索点数) # init_points越多,初始探索越充分;n_iter越多,优化越精细 optimizer.maximize(init_points=5, n_iter=20) end_time = time.time() print(f"优化完成!总耗时: {end_time - start_time:.2f} 秒".center(80))4.# 提取所有迭代的结果
terations = [] scores = [] for i, res in enumerate(optimizer.res): # res包含每次迭代的结果,index从0开始 iterations.append(i + 1) # 迭代次数从1开始 scores.append(res['target']) # 提取得分5.# 计算累计最优值
best_scores = [] current_best = -np.inf # 初始化为负无穷大 for score in scores: if score > current_best: # 检查当前得分是否打破历史记录 current_best = score best_scores.append(current_best)6.# 绘制优化轨迹
# 左图:每次迭代的得分 ax1.plot(iterations, scores, 'o-', label='每次迭代得分', alpha=0.7, markersize=6) ax1.plot(iterations, best_scores, 'r--', label='累计最优得分', linewidth=2) ax1.axhline(y=optimizer.max['target'], color='green', linestyle=':', label=f'最终最优: {optimizer.max["target"]:.4f}') # axhline绘制水平线 ax1.set_xlabel('迭代次数', fontsize=12) ax1.set_ylabel('准确率', fontsize=12) ax1.set_title('贝叶斯优化收敛曲线 (超大空间100次迭代)', fontsize=14, fontweight='bold') ax1.legend() ax1.grid(True, alpha=0.3) # 右图:初始探索 vs 贝叶斯优化 init_points = 20 # 更新为20 ax2.plot(iterations[:init_points], scores[:init_points], 'bo-', label=f'随机探索 (前{init_points}次)', markersize=8, alpha=0.7) ax2.plot(iterations[init_points:], scores[init_points:], 'go-', label=f'贝叶斯优化 (后{len(iterations)-init_points}次)', markersize=8, alpha=0.7) ax2.axvline(x=init_points, color='red', linestyle='--', alpha=0.5, label='探索→利用') # axvline绘制垂直线 ax2.set_xlabel('迭代次数', fontsize=12) ax2.set_ylabel('准确率', fontsize=12) ax2.set_title('探索阶段 vs 利用阶段', fontsize=14, fontweight='bold') ax2.legend() ax2.grid(True, alpha=0.3) plt.tight_layout() plt.show()