In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [4]:
%matplotlib inline
In [5]:
#importing the data set
dataset=pd.read_csv('Salary_Data.csv')
In [7]:
dataset.describe()
Out[7]:
In [7]:
X=dataset.iloc[:,:-1].values
y=dataset.iloc[:,1].values
In [8]:
X
Out[8]:
In [9]:
y
Out[9]:
In [13]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=1/3,random_state=0)
In [14]:
from sklearn.linear_model import LinearRegression
regressor=LinearRegression()
regressor.fit(X_train,y_train)
Out[14]:
In [15]:
y_pred=regressor.predict(X_test)
In [16]:
y_pred
Out[16]:
In [17]:
y_test
Out[17]:
In [19]:
#Visualsing the training set results
plt.scatter(X_train,y_train,color='red')
plt.plot(X_train,regressor.predict(X_train),)
plt.title('Salary vs Experience(Train set)')
plt.xlabel('Experience in years')
plt.ylabel('Salary')
Out[19]:
In [22]:
#Visualising the test set results
plt.scatter(X_test,y_test,color='red')
plt.plot(X_train,regressor.predict(X_train),color='blue')
plt.title('Salary vs experience(Test set)')
plt.xlabel('Experience in years')
plt.ylabel('Salary')
plt.show()
In [ ]: