notebook.community

Edit and run



In [3]:

    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt



In [4]:

    
%matplotlib inline



In [5]:

    
#importing the data set
dataset=pd.read_csv('Salary_Data.csv')



In [7]:

    
dataset.describe()









    Out[7]:







  
    
      
      YearsExperience
      Salary
    
  
  
    
      count
      30.000000
      30.000000
    
    
      mean
      5.313333
      76003.000000
    
    
      std
      2.837888
      27414.429785
    
    
      min
      1.100000
      37731.000000
    
    
      25%
      3.200000
      56720.750000
    
    
      50%
      4.700000
      65237.000000
    
    
      75%
      7.700000
      100544.750000
    
    
      max
      10.500000
      122391.000000



In [7]:

    
X=dataset.iloc[:,:-1].values
y=dataset.iloc[:,1].values



In [8]:

    
X









    Out[8]:





array([[ 1.1],
       [ 1.3],
       [ 1.5],
       [ 2. ],
       [ 2.2],
       [ 2.9],
       [ 3. ],
       [ 3.2],
       [ 3.2],
       [ 3.7],
       [ 3.9],
       [ 4. ],
       [ 4. ],
       [ 4.1],
       [ 4.5],
       [ 4.9],
       [ 5.1],
       [ 5.3],
       [ 5.9],
       [ 6. ],
       [ 6.8],
       [ 7.1],
       [ 7.9],
       [ 8.2],
       [ 8.7],
       [ 9. ],
       [ 9.5],
       [ 9.6],
       [10.3],
       [10.5]])



In [9]:

    
y









    Out[9]:





array([ 39343.,  46205.,  37731.,  43525.,  39891.,  56642.,  60150.,
        54445.,  64445.,  57189.,  63218.,  55794.,  56957.,  57081.,
        61111.,  67938.,  66029.,  83088.,  81363.,  93940.,  91738.,
        98273., 101302., 113812., 109431., 105582., 116969., 112635.,
       122391., 121872.])



In [13]:

    
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=1/3,random_state=0)



In [14]:

    
from sklearn.linear_model import LinearRegression
regressor=LinearRegression()
regressor.fit(X_train,y_train)









    Out[14]:





LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)



In [15]:

    
y_pred=regressor.predict(X_test)



In [16]:

    
y_pred









    Out[16]:





array([ 40835.10590871, 123079.39940819,  65134.55626083,  63265.36777221,
       115602.64545369, 108125.8914992 , 116537.23969801,  64199.96201652,
        76349.68719258, 100649.1375447 ])



In [17]:

    
y_test









    Out[17]:





array([ 37731., 122391.,  57081.,  63218., 116969., 109431., 112635.,
        55794.,  83088., 101302.])



In [19]:

    
#Visualsing the training set results
plt.scatter(X_train,y_train,color='red')
plt.plot(X_train,regressor.predict(X_train),)
plt.title('Salary vs Experience(Train set)')
plt.xlabel('Experience in years')
plt.ylabel('Salary')









    Out[19]:





Text(0, 0.5, 'Salary')



In [22]:

    
#Visualising the test set results
plt.scatter(X_test,y_test,color='red')
plt.plot(X_train,regressor.predict(X_train),color='blue')
plt.title('Salary vs experience(Test set)')
plt.xlabel('Experience in years')
plt.ylabel('Salary')
plt.show()



In [ ]:

	YearsExperience	Salary
count	30.000000	30.000000
mean	5.313333	76003.000000
std	2.837888	27414.429785
min	1.100000	37731.000000
25%	3.200000	56720.750000
50%	4.700000	65237.000000
75%	7.700000	100544.750000
max	10.500000	122391.000000