import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
CS 307: Week 04
= 200
n = np.random.uniform(low=-2*np.pi, high=2*np.pi, size=(n,1))
X = np.sin(X) + np.random.normal(loc=0, scale=0.25, size=(n,1)) y
# setup figure
= plt.subplots(1, 2)
fig, (ax1, ax2) 10, 5)
fig.set_size_inches(100)
fig.set_dpi(
# add overall title
'Simulated Sine Wave Data')
fig.suptitle(
# x values to make predictions at for plotting purposes
= np.linspace(-2*np.pi, 2*np.pi, 1000).reshape((1000, 1))
x_plot
# create subplot for "simulation study"
"Simulation Study")
ax1.set_title(="dodgerblue")
ax1.scatter(X, y, color"x")
ax1.set_xlabel("y")
ax1.set_ylabel(True, linestyle='--', color='lightgrey')
ax1.grid(# add true regression function, the "signal" that we want to learn
='black')
ax1.plot(x_plot, np.sin(x_plot), color
# create subplot for "reality"
"Reality")
ax2.set_title(="dodgerblue")
ax2.scatter(X, y, color"x")
ax2.set_xlabel("y")
ax2.set_ylabel(True, linestyle='--', color='lightgrey')
ax2.grid(
# show plot
plt.show()
= KNeighborsRegressor(n_neighbors=100)
knn100 = KNeighborsRegressor(n_neighbors=10)
knn010 = KNeighborsRegressor(n_neighbors=1) knn001
knn100.fit(X, y)
knn010.fit(X, y) knn001.fit(X, y)
KNeighborsRegressor(n_neighbors=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KNeighborsRegressor(n_neighbors=1)
# setup figure
= plt.subplots(1, 3)
fig, (ax1, ax2, ax3) 15, 5)
fig.set_size_inches(100)
fig.set_dpi(
# add overall title
'Simulated Sine Wave Data')
fig.suptitle(
# x values to make predictions at for plotting purposes
= np.linspace(-2*np.pi, 2*np.pi, 1000).reshape((1000, 1))
x_plot
# create subplot for "simulation study"
"KNN, k = 100")
ax1.set_title(="dodgerblue")
ax1.scatter(X, y, color"x")
ax1.set_xlabel("y")
ax1.set_ylabel(True, linestyle='--', color='lightgrey')
ax1.grid(='black')
ax1.plot(x_plot, np.sin(x_plot), color='red')
ax1.plot(x_plot, knn100.predict(x_plot), color
# create subplot for "reality"
"KNN, k = 10")
ax2.set_title(="dodgerblue")
ax2.scatter(X, y, color"x")
ax2.set_xlabel("y")
ax2.set_ylabel(True, linestyle='--', color='lightgrey')
ax2.grid(='black')
ax2.plot(x_plot, np.sin(x_plot), color='red')
ax2.plot(x_plot, knn010.predict(x_plot), color
# create subplot for "reality"
"KNN, k = 1")
ax3.set_title(="dodgerblue")
ax3.scatter(X, y, color"x")
ax3.set_xlabel("y")
ax3.set_ylabel(True, linestyle='--', color='lightgrey')
ax3.grid(='black')
ax3.plot(x_plot, np.sin(x_plot), color='red')
ax3.plot(x_plot, knn001.predict(x_plot), color
# show plot
plt.show()
= DecisionTreeRegressor(max_depth=1)
dt01 = DecisionTreeRegressor(max_depth=5)
dt05 = DecisionTreeRegressor(max_depth=10) dt10
dt01.fit(X, y)
dt05.fit(X, y) dt10.fit(X, y)
DecisionTreeRegressor(max_depth=10)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeRegressor(max_depth=10)
# setup figure
= plt.subplots(1, 3)
fig, (ax1, ax2, ax3) 15, 5)
fig.set_size_inches(100)
fig.set_dpi(
# add overall title
'Simulated Sine Wave Data')
fig.suptitle(
# x values to make predictions at for plotting purposes
= np.linspace(-2*np.pi, 2*np.pi, 1000).reshape((1000, 1))
x_plot
# create subplot for "simulation study"
"DT, max_depth = 1")
ax1.set_title(="dodgerblue")
ax1.scatter(X, y, color"x")
ax1.set_xlabel("y")
ax1.set_ylabel(True, linestyle='--', color='lightgrey')
ax1.grid(='black')
ax1.plot(x_plot, np.sin(x_plot), color='red')
ax1.plot(x_plot, dt01.predict(x_plot), color
# create subplot for "reality"
"DT, max_depth = 05")
ax2.set_title(="dodgerblue")
ax2.scatter(X, y, color"x")
ax2.set_xlabel("y")
ax2.set_ylabel(True, linestyle='--', color='lightgrey')
ax2.grid(='black')
ax2.plot(x_plot, np.sin(x_plot), color='red')
ax2.plot(x_plot, dt05.predict(x_plot), color
# create subplot for "reality"
"DT, max_depth = 10")
ax3.set_title(="dodgerblue")
ax3.scatter(X, y, color"x")
ax3.set_xlabel("y")
ax3.set_ylabel(True, linestyle='--', color='lightgrey')
ax3.grid(='black')
ax3.plot(x_plot, np.sin(x_plot), color='red')
ax3.plot(x_plot, dt10.predict(x_plot), color
# show plot
plt.show()
# train RMSE goes down as flexibility goes up
print(np.sqrt(np.mean(((dt01.predict(X)).reshape(200, 1) - y) ** 2)))
print(np.sqrt(np.mean(((dt05.predict(X)).reshape(200, 1) - y) ** 2)))
print(np.sqrt(np.mean(((dt10.predict(X)).reshape(200, 1) - y) ** 2)))
0.5922964673600705
0.2321060292671149
0.06085014708879982