标签 离群值 下的文章

本文参考《Google机器学习速成课程》使用TF的基本步骤

尝试合成特征

california_housing_dataframe["rooms_per_person"] =(california_housing_dataframe["total_rooms"] / california_housing_dataframe["population"])

calibration_data = train_model(
    learning_rate=0.00005,
    steps=500,
    batch_size=5,
    input_feature="rooms_per_person"
)
#period 09 : 237.29

处理离群值

#识别离群值
plt.figure(figsize=(15, 6))
plt.subplot(1, 2, 1)
plt.scatter(calibration_data["predictions"], calibration_data["targets"])
#绘制 rooms_per_person 的直方图,发现少量离群值
plt.subplot(1, 2, 2)
_ = california_housing_dataframe["rooms_per_person"].hist()
#截取离群值
california_housing_dataframe["rooms_per_person"] = (
california_housing_dataframe["rooms_per_person"]).apply(lambda x: min(x, 5))
_ = california_housing_dataframe["rooms_per_person"].hist()
#period 09 : 108.23