In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
In [2]:
df = pd.read_csv('./Polynomial_Regression/Position_Salaries.csv')
print df.shape
df.head()
(10, 3)
Out[2]:
Position Level Salary
0 Business Analyst 1 45000
1 Junior Consultant 2 50000
2 Senior Consultant 3 60000
3 Manager 4 80000
4 Country Manager 5 110000
In [14]:
plt.scatter(df.iloc[:,1], df.iloc[:,2],s=12, c='r',alpha=.7)
Out[14]:
<matplotlib.collections.PathCollection at 0x7fa546e2a3d0>
In [23]:
df.iloc[:,1:].plot(kind='scatter',y='Salary', x='Level')
Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa546cc6510>
In [27]:
df.iloc[:,1:].plot.scatter(x='Level',y='Salary',c='g')
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa546b02810>

In [29]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
In [51]:
pl = PolynomialFeatures(3)
pl
Out[51]:
PolynomialFeatures(degree=3, include_bias=True, interaction_only=False)
In [52]:
df.iloc[:,1:2].head(3)
Out[52]:
Level
0 1
1 2
2 3
In [53]:
df.iloc[:,2:3].head()
Out[53]:
Salary
0 45000
1 50000
2 60000
3 80000
4 110000
In [66]:
X,y = df.iloc[:,1:2].values,df.iloc[:,2:3].values
X.shape,y.shape
Out[66]:
((10, 1), (10, 1))
In [67]:
pl.fit_transform(X)
Out[67]:
array([[    1.,     1.,     1.,     1.],
       [    1.,     2.,     4.,     8.],
       [    1.,     3.,     9.,    27.],
       [    1.,     4.,    16.,    64.],
       [    1.,     5.,    25.,   125.],
       [    1.,     6.,    36.,   216.],
       [    1.,     7.,    49.,   343.],
       [    1.,     8.,    64.,   512.],
       [    1.,     9.,    81.,   729.],
       [    1.,    10.,   100.,  1000.]])
In [90]:
plt.figure(figsize=(12,7))
plt.scatter(X, y)
plt.plot(X, LinearRegression().fit(X,y).predict(X), c='r')
plt.plot(X, LinearRegression().fit(PolynomialFeatures(2).fit_transform(X),y).predict(PolynomialFeatures(2).fit_transform(X)),c='g')
plt.plot(X, LinearRegression().fit(PolynomialFeatures(3).fit_transform(X),y).predict(PolynomialFeatures(3).fit_transform(X)),c='orange')
plt.plot(X, LinearRegression().fit(PolynomialFeatures(4).fit_transform(X),y).predict(PolynomialFeatures(4).fit_transform(X)),c='purple')
Out[90]:
[<matplotlib.lines.Line2D at 0x7fa537b9f550>]
In [103]:
X_grid = np.arange(min(X), max(X), 0.1)
X_grid = X_grid.reshape(-1,1)

plt.figure(figsize=(12,7))
plt.scatter(X, y)
plt.plot(X_grid, LinearRegression().fit(X,y).predict(X_grid), c='r')
plt.plot(X_grid, LinearRegression().fit(PolynomialFeatures(2).fit_transform(X),y).predict(PolynomialFeatures(2).fit_transform(X_grid)),c='g')
plt.plot(X_grid, LinearRegression().fit(PolynomialFeatures(3).fit_transform(X),y).predict(PolynomialFeatures(3).fit_transform(X_grid)),c='orange')
plt.plot(X_grid, LinearRegression().fit(PolynomialFeatures(4).fit_transform(X),y).predict(PolynomialFeatures(4).fit_transform(X_grid)),c='purple')
Out[103]:
[<matplotlib.lines.Line2D at 0x7fa537a32210>]