import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.datasets import make_friedman1
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import plot_tree
from sklearn.metrics import mean_squared_error
CS 307: Week 03
42)
np.random.seed(= 200
n = np.random.uniform(low=-2*np.pi, high=2*np.pi, size=(n,1))
X = np.sin(X) + np.random.normal(loc=0, scale=0.25, size=(n,1)) y
# setup figure
= plt.subplots(1, 2)
fig, (ax1, ax2) 10, 5)
fig.set_size_inches(100)
fig.set_dpi(
# add overall title
'Simulated Sine Wave Data')
fig.suptitle(
# x values to make predictions at for plotting purposes
= np.linspace(-2*np.pi, 2*np.pi, 1000).reshape((1000, 1))
x_plot
# create subplot for "simulation study"
"Simulation Study")
ax1.set_title(="dodgerblue")
ax1.scatter(X, y, color"x")
ax1.set_xlabel("y")
ax1.set_ylabel(True, linestyle='--', color='lightgrey')
ax1.grid(# add true regression function, the "signal" that we want to learn
='black')
ax1.plot(x_plot, np.sin(x_plot), color
# create subplot for "reality"
"Reality")
ax2.set_title(="dodgerblue")
ax2.scatter(X, y, color"x")
ax2.set_xlabel("y")
ax2.set_ylabel(True, linestyle='--', color='lightgrey')
ax2.grid(
# show plot
plt.show()
= KNeighborsRegressor(n_neighbors=10)
knn010 knn010.fit(X, y)
KNeighborsRegressor(n_neighbors=10)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KNeighborsRegressor(n_neighbors=10)
= DecisionTreeRegressor(min_samples_split=100)
dt100 dt100.fit(X, y)
DecisionTreeRegressor(min_samples_split=100)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeRegressor(min_samples_split=100)
# setup figure
= plt.subplots(1, 2)
fig, (ax1, ax2) 10, 5)
fig.set_size_inches(100)
fig.set_dpi(
# add overall title
'Simulated Sine Wave Data')
fig.suptitle(
# x values to make predictions at for plotting purposes
= np.linspace(-2*np.pi, 2*np.pi, 1000).reshape((1000, 1))
x_plot
# create subplot for KNN
"k-Nearest Neighbors, k=10")
ax1.set_title(="dodgerblue")
ax1.scatter(X, y, color"x")
ax1.set_xlabel("y")
ax1.set_ylabel(True, linestyle='--', color='lightgrey')
ax1.grid(='black')
ax1.plot(x_plot, knn010.predict(x_plot), color
# create subplot for decision tree
"Decision Tree, min_samples_split=100")
ax2.set_title(="dodgerblue")
ax2.scatter(X, y, color"x")
ax2.set_xlabel("y")
ax2.set_ylabel(True, linestyle='--', color='lightgrey')
ax2.grid(='black')
ax2.plot(x_plot, dt100.predict(x_plot), color
# show plot
plt.show()
= plt.subplots(1, 1)
fig, ax 3, 3)
fig.set_size_inches(200)
fig.set_dpi(
plot_tree(dt100) plt.show()
= DecisionTreeRegressor(min_samples_split=2)
dt002 = DecisionTreeRegressor(min_samples_split=100)
dt100 = DecisionTreeRegressor(min_samples_split=250) dt250
dt002.fit(X, y)
dt100.fit(X, y) dt250.fit(X, y)
DecisionTreeRegressor(min_samples_split=250)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeRegressor(min_samples_split=250)
# setup figure
= plt.subplots(1, 3)
fig, (ax1, ax2, ax3) 15, 5)
fig.set_size_inches(100)
fig.set_dpi(
# add overall title
'Simulated Sine Wave Data')
fig.suptitle(
# x values to make predictions at for plotting purposes
= np.linspace(-2*np.pi, 2*np.pi, 1000).reshape((1000, 1))
x_plot
# create subplot for decision tree with min_samples_split=250
"k-Nearest Neighbors, min_samples_split=250")
ax1.set_title(="dodgerblue")
ax1.scatter(X, y, color"x")
ax1.set_xlabel("y")
ax1.set_ylabel(True, linestyle='--', color='lightgrey')
ax1.grid(='black')
ax1.plot(x_plot, dt250.predict(x_plot), color
# create subplot for decision tree with min_samples_split=100
"k-Nearest Neighbors, min_samples_split=100")
ax2.set_title(="dodgerblue")
ax2.scatter(X, y, color"x")
ax2.set_xlabel("y")
ax2.set_ylabel(True, linestyle='--', color='lightgrey')
ax2.grid(='black')
ax2.plot(x_plot, dt100.predict(x_plot), color
# create subplot for decision tree with min_samples_split=2
"k-Nearest Neighbors, min_samples_split=2")
ax3.set_title(="dodgerblue")
ax3.scatter(X, y, color"x")
ax3.set_xlabel("y")
ax3.set_ylabel(True, linestyle='--', color='lightgrey')
ax3.grid(='black')
ax3.plot(x_plot, dt002.predict(x_plot), color
# show plot
plt.show()
= DecisionTreeRegressor(max_depth=1)
dt_d01 = DecisionTreeRegressor(max_depth=5)
dt_d05 = DecisionTreeRegressor(max_depth=10) dt_d10
dt_d01.fit(X, y)
dt_d05.fit(X, y) dt_d10.fit(X, y)
DecisionTreeRegressor(max_depth=10)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeRegressor(max_depth=10)
# setup figure
= plt.subplots(1, 3)
fig, (ax1, ax2, ax3) 15, 5)
fig.set_size_inches(100)
fig.set_dpi(
# add overall title
'Simulated Sine Wave Data')
fig.suptitle(
# x values to make predictions at for plotting purposes
= np.linspace(-2*np.pi, 2*np.pi, 1000).reshape((1000, 1))
x_plot
# create subplot for decision tree with max_depth=1
"k-Nearest Neighbors, max_depth=1")
ax1.set_title(="dodgerblue")
ax1.scatter(X, y, color"x")
ax1.set_xlabel("y")
ax1.set_ylabel(True, linestyle='--', color='lightgrey')
ax1.grid(='black')
ax1.plot(x_plot, dt_d01.predict(x_plot), color
# create subplot for decision tree with max_depth=5
"k-Nearest Neighbors, max_depth=5")
ax2.set_title(="dodgerblue")
ax2.scatter(X, y, color"x")
ax2.set_xlabel("y")
ax2.set_ylabel(True, linestyle='--', color='lightgrey')
ax2.grid(='black')
ax2.plot(x_plot, dt_d05.predict(x_plot), color
# create subplot for decision tree with max_depth=10
"k-Nearest Neighbors, max_depth=10")
ax3.set_title(="dodgerblue")
ax3.scatter(X, y, color"x")
ax3.set_xlabel("y")
ax3.set_ylabel(True, linestyle='--', color='lightgrey')
ax3.grid(='black')
ax3.plot(x_plot, dt_d10.predict(x_plot), color
# show plot
plt.show()
= make_friedman1(n_samples=200, n_features=7, random_state=42)
X_train, y_train = make_friedman1(n_samples=200, n_features=7, random_state=1) X_test, y_test
X_train.shape
(200, 7)
= KNeighborsRegressor(n_neighbors=25)
knn = DecisionTreeRegressor(min_samples_split=10) dt
knn.fit(X_train, y_train) dt.fit(X_train, y_train)
DecisionTreeRegressor(min_samples_split=10)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeRegressor(min_samples_split=10)
= knn.predict(X_test)
knn_pred = dt.predict(X_test) dt_pred
print(np.sqrt(mean_squared_error(y_test, knn_pred)))
print(np.sqrt(mean_squared_error(y_test, dt_pred)))
2.983752505149063
2.834171716110265