import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
df = pd.read_csv('./Polynomial_Regression/Position_Salaries.csv')
print df.shape
df.head()
plt.scatter(df.iloc[:,1], df.iloc[:,2],s=12, c='r',alpha=.7)
df.iloc[:,1:].plot(kind='scatter',y='Salary', x='Level')
df.iloc[:,1:].plot.scatter(x='Level',y='Salary',c='g')
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
pl = PolynomialFeatures(3)
pl
df.iloc[:,1:2].head(3)
df.iloc[:,2:3].head()
X,y = df.iloc[:,1:2].values,df.iloc[:,2:3].values
X.shape,y.shape
pl.fit_transform(X)
plt.figure(figsize=(12,7))
plt.scatter(X, y)
plt.plot(X, LinearRegression().fit(X,y).predict(X), c='r')
plt.plot(X, LinearRegression().fit(PolynomialFeatures(2).fit_transform(X),y).predict(PolynomialFeatures(2).fit_transform(X)),c='g')
plt.plot(X, LinearRegression().fit(PolynomialFeatures(3).fit_transform(X),y).predict(PolynomialFeatures(3).fit_transform(X)),c='orange')
plt.plot(X, LinearRegression().fit(PolynomialFeatures(4).fit_transform(X),y).predict(PolynomialFeatures(4).fit_transform(X)),c='purple')
X_grid = np.arange(min(X), max(X), 0.1)
X_grid = X_grid.reshape(-1,1)
plt.figure(figsize=(12,7))
plt.scatter(X, y)
plt.plot(X_grid, LinearRegression().fit(X,y).predict(X_grid), c='r')
plt.plot(X_grid, LinearRegression().fit(PolynomialFeatures(2).fit_transform(X),y).predict(PolynomialFeatures(2).fit_transform(X_grid)),c='g')
plt.plot(X_grid, LinearRegression().fit(PolynomialFeatures(3).fit_transform(X),y).predict(PolynomialFeatures(3).fit_transform(X_grid)),c='orange')
plt.plot(X_grid, LinearRegression().fit(PolynomialFeatures(4).fit_transform(X),y).predict(PolynomialFeatures(4).fit_transform(X_grid)),c='purple')