In [1]:
%matplotlib inline
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import csv

x = []
y = []

with open('data.csv','r') as csvfile:
    plots = csv.reader(csvfile, delimiter=',')
    for row in plots:
        x.append(float(row[0]))
        y.append(float(row[1]) + 23)
        
plt.plot(x,y, label='Length')
plt.xlabel('kg')
plt.ylabel('cm')
plt.title('ToD')
plt.legend()


plt.show()



In [13]:
# Create test and training sets.

import pandas as pd
data = pd.read_csv("data.csv", header=None, names=['kg', 'cm'])
labels = data['kg']
train1 = data.drop(['kg'], axis=1)

from sklearn.cross_validation import train_test_split
x_train, x_test, y_train, y_test = train_test_split(train1, labels)

In [3]:
# Guess kg based on cm.

from sklearn.linear_model import LinearRegression
reg = LinearRegression()

reg.fit(x_train, y_train)

reg.score(x_test, y_test)
print(reg.predict(260))


[50.26025693]

In [5]:
# Guess kg based on cm using a different method.

from sklearn import ensemble

clf = ensemble.GradientBoostingRegressor(n_estimators=400, max_depth=5, min_samples_split=2, learning_rate=0.1, loss='ls')

clf.fit(x_train, y_train)
clf.score(x_test, y_test)
print(clf.predict(260))


[155.00065195]
/home/mike/anaconda3/lib/python3.7/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.
  from numpy.core.umath_tests import inner1d

In [18]:
# Guess cm based on kg.

import pandas as pd
data = pd.read_csv("data.csv", header=None, names=['kg', 'cm'])
labels = data['cm']
train1 = data.drop(['cm'], axis=1)

from sklearn.cross_validation import train_test_split
x_train, x_test, y_train, y_test = train_test_split(train1, labels, test_size=0.10)

from sklearn.linear_model import LinearRegression
reg = LinearRegression()

reg.fit(x_train, y_train)

reg.score(x_test, y_test)
print(reg.predict(145))


[-14.35926944]

In [20]:
# when the regression line is linear (y = ax + b)
# the regression coefficient is the constant (a) that represents the rate of change of one variable (y)
# as a function of changes in the other (x)
print(reg.coef_)

# The intercept (often labeled the constant) is the expected mean value of Y when all X=0.
print(reg.intercept_)


[-2.89221563]
405.0119962525174

In [16]:
data = pd.read_csv('data.csv', header=None, names=['kg', 'cm'])
labels = data['cm']
train1 = data.drop(['cm'], axis=1) #This is similar to selecting the kg column

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(train1, labels, test_size=0.10, random_state=2)

from sklearn.linear_model import LinearRegression

reg = LinearRegression()
reg.fit(x_train, y_train)
reg.score(x_test, y_test)

import numpy as np
reg.predict(np.array([80]).reshape(-1, 1))


Out[16]:
array([172.65013306])