泛化能力:由此及彼能力
遇见新的拟合能力差
数据
#数据import numpy as npimport matplotlib.pyplot as pltx = np.random.uniform(-3,3,size=100)#在最新版本的sklearn中,所有的数据都应该是二维矩阵,哪怕它只是单独一行或一列。X = x.reshape(-1,1)y = 0.5 * x ** 2 + x + 2 +np.random.normal(0,1,size=100)plt.scatter(x,y)#非线性关系#print(X)
from sklearn.pipeline import Pipelinefrom sklearn.preprocessing import StandardScalerfrom sklearn.preprocessing import PolynomialFeaturesdef PolynomialRegression(degree):return Pipeline([('poly',PolynomialFeatures(degree=degree)),('std_scaler',StandardScaler()),('lin_reg',LinearRegression())])
#过度拟合,欠拟合#为什么要训练数据和测试数据#train test split 的意义from sklearn.model_selection import train_test_splitX_train,X_test,y_train,y_test = train_test_split(X,y,random_state=666)
使用测试数据集
线性回归
from sklearn.linear_model import LinearRegressionlin_reg = LinearRegression()lin_reg.fit(X_train,y_train)y_predict = lin_reg.predict(X_test)
from sklearn.metrics import mean_squared_errorprint(mean_squared_error(y_test,y_predict))
3.0558244599225235
多项式回归degree = 2
poly2_reg = PolynomialRegression(degree=2)poly2_reg.fit(X_train,y_train)y2_predict = poly2_reg.predict(X_test)print(mean_squared_error(y2_predict,y_test))
1.0186698710398034
多项式回归degree = 10
poly2_reg = PolynomialRegression(degree=10)poly2_reg.fit(X_train,y_train)y2_predict = poly2_reg.predict(X_test)print(mean_squared_error(y2_predict,y_test))
0.9294573452521058
degree = 100
poly2_reg = PolynomialRegression(degree=100)poly2_reg.fit(X_train,y_train)y2_predict = poly2_reg.predict(X_test)print(mean_squared_error(y2_predict,y_test))
误差非常大!!!泛化能力非常弱!!!!!!!
5.5125601832506963e+17
训练,测试数据集分离
过拟合?毛发颜色是黄颜色的是狗(其实狗还有别的颜色,颜色特征不是一般特征,这个特征找的太细节了)
测评模型泛化能力。。但还有更好的办法。。
如果觉得《Python机器学习:多项式回归与模型泛化004为什么需要训练数据集和测试数据集》对你有帮助,请点赞、收藏,并留下你的观点哦!