内容回顾:数据初步可视化
- 单特征可视化:连续变量箱线图(还说了核密度直方图)、离散特征直方图
- 特征和标签关系可视化
- 箱线图美化--->直方图
作业:去针对其他特征绘制单特征图和特征和标签的关系图,并且试图观察出一些有意思的结论
import pandas as pd data = pd.read_csv(r'D:\课程\课程文件1\pythonstudy\pythonx训练营\python-60days-challenge\data.csv') data.head()continous_features = [] discrete_features = [] for i in data.columns: if data[i].dtype == 'object': continous_features.append(i) else: discrete_features.append(i) print(continous_features) print(discrete_features)['Home Ownership', 'Years in current job', 'Purpose', 'Term'] ['Id', 'Annual Income', 'Tax Liens', 'Number of Open Accounts', 'Years of Credit History', 'Maximum Open Credit', 'Number of Credit Problems', 'Months since last delinquent', 'Bankruptcies', 'Current Loan Amount', 'Current Credit Balance', 'Monthly Debt', 'Credit Score', 'Credit Default']import seaborn as sns import matplotlib.pyplot as plt import pandas as pd #箱线图 sns.boxplot(x = data['Home Ownership']) plt.title('Home Ownership 的箱线图') plt.xlabel('Home Ownership') plt.show() plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False#直方图 sns.histplot(data['Current Credit Balance']) plt.title('当前信用余额 直方图') plt.xlabel('当前信用余额') plt.ylabel('员工数量') plt.show()#特征与标签的关系 # 可视化方式:箱线图 plt.figure(figsize=(8, 16)) sns.boxplot(x='Current Credit Balance', y='Home Ownership', data=data) plt.title('Home Ownership vs. Current Credit Balance') plt.xlabel('Current Credit Balance') plt.ylabel('Current Credit Balance') plt.show()# 另一种可视化方式:小提琴图 plt.figure(figsize=(8, 16)) sns.violinplot(x='Current Credit Balance', y='Home Ownership', data=data) plt.title('Home Ownership vs. Current Credit Balance') plt.xlabel('Current Credit Balance') plt.ylabel('Current Credit Balance') plt.show()# 绘制 Current Credit Balance 和 Credit Default 的关系图 plt.figure(figsize=(18, 16)) sns.histplot(x='Current Credit Balance', hue='Home Ownership', data=data, kde=True, element="step") #,kde表示核密度,kde=True表示使用核密度估计来平滑边缘,element="step"表示使用阶梯状的直方图 plt.title('Current Credit Balance vs. Home Ownership') plt.xlabel('Current Credit Balance') plt.ylabel('Count') plt.show()浙大疏锦行