Mouse Tracking



In [1]:
import json
import os
import shutil
import pandas as pd
import matplotlib.pyplot as plt
from math import copysign as sgn
%matplotlib inline

In [2]:
# data manipulation functions
def number_of_tests():
    return len(os.listdir('tests'))

def clear_tests():
    for i in os.listdir('tests'):
        os.remove('tests/{0}'.format(i))

def load_all_tests(num):
    test_name = 1
    for root, dirs, files in os.walk('all_tests/test_{0}'.format(num)):
        for file in files:
            if file != '1.test':
                shutil.copyfile('{0}/{1}'.format(root, file), 'tests/{0}.test'.format(test_name))
                test_name += 1

def load_test(num):
    jsObj = open("tests/{0}.test".format(num), "r")
    testObj = json.load(jsObj)
    jsObj.close()
    return testObj

In [3]:
# parameters extracting

# -------- Functions for static experiments
def distance(x1, y1, x2, y2):
    return ((x1-x2)**2 + (y1-y2)**2)**(1/2)

def velocity(arr, num):
    try:
        return distance(arr[num]['x'], arr[num]['y'], arr[num-1]['x'], arr[num-1]['y']) / (arr[num]['time'] - arr[num-1]['time'])
    except:
        return 0

def first_distance(obj):
    return distance(obj['ballX'], obj['ballY'], obj['data'][0]['x'], obj['data'][0]['y'])

def searching_delay(obj):
    garanted_find = min(20, len(obj['data'])-1)
    barrier_velocity = 0.5 #px/msec
    for i in range(garanted_find):
        if velocity(obj['data'], i+1) > barrier_velocity:
            return obj['data'][i+1]['time']
    return obj['data'][garanted_find]['time']

def all_distance(obj):
    result = 0
    for i in range(1, len(obj['data'])):
        result += distance(obj['data'][i]['x'], obj['data'][i]['y'], obj['data'][i-1]['x'], obj['data'][i-1]['y'])
    return result

def all_time(obj):
    return obj['data'][-1]['time']

def average_velocity(obj):
    return all_distance(obj) / all_time(obj)

def max_deviation(obj):
    maximum = 0
    
    x1, y1 = (obj['data'][0]['x'], obj['data'][0]['y'])
    x2, y2 = (obj['ballX'], obj['ballY'])
    
    A = (y2-y1)/(x1*y2-x2*y1)
    B = (x1-x2)/(x1*y2-x2*y1)
    H = lambda v: abs(A*v[0]+B*v[1]-1)/( (A**2 + B**2)**(1/2) ) #x = v[0], y = v[1]
    
    for i in obj['data']:
        maximum = max(maximum, H([i['x'], i['y']]))
    
    return maximum

def inertion(obj): # length, time
    x0, y0, R = (obj['ballX'], obj['ballY'], obj['ballSize'])
    max_len = 0
    in_ball = lambda x, y: (x-x0)**2+(y-y0)**2 < max(R, 50)**2
    
    first_intersect = -1
    for i in range(len(obj['data'])):
        if in_ball(obj['data'][i]['x'], obj['data'][i]['y']):
            first_intersect = i
            break
    if first_intersect < 0:
        return (0, 0)
    
    point = first_intersect # первое пересечение с шариком
    while point < len(obj['data']) and in_ball(obj['data'][point]['x'], obj['data'][point]['y']):
        point += 1
    if point == len(obj['data']):
        return (0, 0)
    
    second_intersect = point # второе пересечение - вылет из шарика из-за инерции
    while point < len(obj['data']) and not in_ball(obj['data'][point]['x'], obj['data'][point]['y']):
        max_len = max(max_len, distance(obj['data'][point]['x'], obj['data'][point]['y'], x0, y0))
        point += 1
    if point == len(obj['data']):
        return (max_len, obj['data'][second_intersect]['time']-obj['data'][first_intersect]['time'])
    
    third_intersect = point # третье пересечение - возвражение обратно в шарик (корректировка)
    return (max_len, obj['data'][third_intersect]['time']-obj['data'][second_intersect]['time'])

def right_or_left(obj):
    if obj['data'][0]['x'] > obj['ballX']:
        return 1
    else:
        return -1
    
#----- Functions for dynamic experiments

def max_deviation_d(obj): # +, если прошел по верху, -, если прошел по низу
    maximum = 0
    minimum = 0
    
    x1, y1 = (obj['data'][0]['x'], obj['data'][0]['y'])
    x2, y2 = (lambda n: obj['ballData'][n]['x'], lambda n: obj['ballData'][n]['y'])
    
    A = lambda n: (y2(n)-y1)/(x1*y2(n)-x2(n)*y1)
    B = lambda n: (x1-x2(n))/(x1*y2(n)-x2(n)*y1)
    H = lambda v, n: (A(n)*v[0]+B(n)*v[1]-1)/( (A(n)**2 + B(n)**2)**(1/2) ) #x = v[0], y = v[1]
    
    for i in range(len(obj['data'])):
        h = H([obj['data'][i]['x'], obj['data'][i]['y']], i)
        maximum = max(maximum, h)
        minimum = min(minimum, h)
    
    if(abs(minimum) > maximum):
        return minimum
    else:
        return maximum

def ball_velocity(obj):
    try:
        return distance(obj['ballData'][0]['x'], obj['ballData'][0]['y'], obj['ballData'][1]['x'], obj['ballData'][1]['y'])/(obj['ballData'][1]['time']-obj['ballData'][0]['time'])
    except:
        return 0
    
def correl(obj):
    bx, by = (obj['ballData'][1]['x']-obj['ballData'][0]['x'], obj['ballData'][1]['y']-obj['ballData'][0]['y'])
    x, y = (obj['data'][1]['x']-obj['data'][0]['x'], obj['data'][1]['y']-obj['data'][0]['y'])
    return sgn(1, bx*x + by*y)

def scalar(obj, i, j):
    return (obj['data'][i]['x']-obj['data'][i-1]['x'])*(obj['data'][j]['x']-obj['data'][j-1]['x']) + (obj['data'][i]['y']-obj['data'][i-1]['y'])*(obj['data'][j]['y']-obj['data'][j-1]['y'])

def scal_cos(obj, i, j):
    try:
        return scalar(obj, i, j)/(scalar(obj, i, i)*scalar(obj, j, j))**(1/2)
    except:
        return 0

def deviation_coef(obj, i, j):
    return 1+( abs( scal_cos(obj, i, j) -1)/2 )**(1/2)

def delay_d(obj):
    barrier_value = 0.3
    for i in range(1, len(obj['data'])-1):
        if velocity(obj['data'], i+1)*deviation_coef(obj, i+1, i) > barrier_value:
            return obj['data'][i]['time']
    return 0 # пользователь не подал признаков того, что он нашел, значит в предположении, он сразу его увидел

def inertion_d(obj): # length, time
    x0, y0, R = (lambda x: obj['ballData'][x]['x'], lambda x: obj['ballData'][x]['y'], obj['ballSize'])
    max_len = 0
    in_ball = lambda x, y, p: (x-x0(p))**2+(y-y0(p))**2 < max(R, 50)**2
    
    first_intersect = -1
    for i in range(len(obj['data'])):
        if in_ball(obj['data'][i]['x'], obj['data'][i]['y'], i):
            first_intersect = i
            break
    if first_intersect < 0:
        return (0, 0)
    
    point = first_intersect # первое пересечение с шариком
    while point < len(obj['data']) and in_ball(obj['data'][point]['x'], obj['data'][point]['y'], point):
        point += 1
    if point == len(obj['data']):
        return (0, 0)
    
    second_intersect = point # второе пересечение - вылет из шарика из-за инерции
    while point < len(obj['data']) and not in_ball(obj['data'][point]['x'], obj['data'][point]['y'], point):
        max_len = max(max_len, distance(obj['data'][point]['x'], obj['data'][point]['y'], x0(point), y0(point)))
        point += 1
    if point == len(obj['data']):
        return (max_len, obj['data'][second_intersect]['time']-obj['data'][first_intersect]['time'])
    
    third_intersect = point # третье пересечение - возвражение обратно в шарик (корректировка)
    return (max_len, obj['data'][third_intersect]['time']-obj['data'][second_intersect]['time'])

def relative_coords(obj):
    return (obj['ballX']-obj['data'][0]['x'], obj['ballY']-obj['data'][0]['y'])
#----- Agreration Data -----------

# Перед тем как использовать эту функцию, убедитесь, что в папку all_tests загруженны все ваши тесты
def get_test_data(number):
    clear_tests()
    load_all_tests(number)
    raw_data = [load_test(x) for x in range(1, number_of_tests())]
    functions = [
        [lambda x: x['ballX'], lambda x: x['ballY'], lambda x: x['ballSize'], right_or_left, first_distance, all_distance, searching_delay, all_time, average_velocity, max_deviation, lambda x: inertion(x)[0], lambda x: inertion(x)[1], lambda x: x['shots']],
        [lambda x: x['data'][0]['x'], lambda x: x['data'][0]['y'], lambda x: relative_coords(x)[0], lambda x: relative_coords(x)[1], lambda x: x['ballSize'], first_distance, ball_velocity, correl, all_distance, all_time, average_velocity, lambda x: abs(max_deviation_d(x)), lambda x: sgn(1, max_deviation_d(x)), delay_d, lambda x: inertion_d(x)[0], lambda x: inertion_d(x)[1]],
        [lambda x: x['data'][0]['x'], lambda x: x['data'][0]['y'], lambda x: relative_coords(x)[0], lambda x: relative_coords(x)[1], lambda x: x['ballSize'], first_distance, ball_velocity, correl, all_distance, all_time, average_velocity, lambda x: abs(max_deviation_d(x)), lambda x: sgn(1, max_deviation_d(x)), delay_d, lambda x: inertion_d(x)[0], lambda x: inertion_d(x)[1]]
    ]
    cols = [
        ['BallX', 'BallY', 'BallSize', 'RorL', 'FirstDist', 'AllDist', 'Delay', 'AllTime', 'AverVelocity', 'MaxDev', 'InerLen', 'InerTime', 'Shots'],
        ['Px', 'Py', 'RBallX', 'RBallY', 'BallSize', 'FirstDist', 'BallVelocity', 'Correlation', 'AllDist', 'AllTime', 'AverVelocity', 'AbsMaxDev', 'UpOrDown', 'Delay', 'InerLen', 'InerTime'],
        ['Px', 'Py', 'RBallX', 'RBallY', 'BallSize', 'FirstDist', 'BallVelocity', 'Correlation', 'AllDist', 'AllTime', 'AverVelocity', 'AbsMaxDev', 'UpOrDown', 'Delay', 'InerLen', 'InerTime']
    ]
    return pd.DataFrame.from_records([[f(x) for f in functions[number-1]] for x in raw_data], columns=cols[number-1])

In [4]:
data1 = get_test_data(1)
data1 = data1[data1.AllTime < 5000]

from sklearn import linear_model
pparams = ['AllDist', 'Delay', 'AllTime', 'AverVelocity', 'MaxDev', 'InerLen', 'InerTime', 'Shots']

regs = {}
for p in pparams:
    regs[p] = linear_model.LinearRegression()
    regs[p].fit(data1[['BallSize', 'FirstDist']], data1[p])

def predict(size, dist):
    for p in regs.keys():
        print(p, ": ", regs[p].predict([size, dist])[0])
        
def print_regs():
    for p in regs.keys():
        print("{0:<12} = {1:<8.5} + {2:<8.3} * Size + {3:<9.3} * Distantion".format(p, regs[p].intercept_, regs[p].coef_[0], regs[p].coef_[1]))

print('Static model:')
print_regs()


Static model:
Shots        = 1.5448   + -0.00152 * Size + -0.000156 * Distantion
InerLen      = 0.54318  + 0.134    * Size + 0.02      * Distantion
AllDist      = 72.029   + -0.271   * Size + 1.09      * Distantion
MaxDev       = 21.608   + 0.32     * Size + 0.0448    * Distantion
Delay        = 362.43   + -0.39    * Size + -0.102    * Distantion
AllTime      = 973.78   + -3.03    * Size + 0.38      * Distantion
InerTime     = 35.905   + -0.0581  * Size + 0.0244    * Distantion
AverVelocity = 0.036215 + 0.0016   * Size + 0.001     * Distantion

In [5]:
from sklearn import svm

data2 = get_test_data(2)
data2corr = data2[data2.Correlation > 0]
data2ncorr = data2[data2.Correlation < 0]
data2iner = data2[data2.InerLen > 0]

inerPercentage = len(data2iner)/len(data2)

# --- FOR CORRELATE ----
pparamscorr = ['AllDist', 'Delay', 'AllTime', 'AverVelocity', 'AbsMaxDev']

regscorr = {}
for p in pparamscorr:
    regscorr[p] = linear_model.LinearRegression()
    regscorr[p].fit(data2corr[['BallSize', 'FirstDist', 'BallVelocity']], data2corr[p])
    
def print_regs_corr():
    for p in regscorr.keys():
        print("{0:<12} = {1:<9.5} + {2:<9.3} * Size + {3:<9.3} * Distantion + {4:<9.3} * Velocity".format(p, regscorr[p].intercept_, regscorr[p].coef_[0], regscorr[p].coef_[1], regscorr[p].coef_[2]))

devPos = svm.SVC(kernel='linear')
devPos.fit(data2corr[['RBallX', 'RBallY']], data2corr['UpOrDown'])

def print_dev_corr():
    xx, yy = 0, 0
    for i in range(len(devPos.dual_coef_[0])):
        xx+=devPos.dual_coef_[0][i]*devPos.support_vectors_[i][0]
        yy+=devPos.dual_coef_[0][i]*devPos.support_vectors_[i][1]
    print("DevPos       = sign( {0:<9.5} * RBallX + {1:<9.5} * RBallY + {2:<9.5} )".format(xx, yy, devPos.intercept_[0]))

# --- FOR NOT CORRELATE ----
pparamsncorr = ['AllDist', 'Delay', 'AllTime', 'AverVelocity', 'AbsMaxDev']

regsncorr = {}
for p in pparamsncorr:
    regsncorr[p] = linear_model.LinearRegression()
    regsncorr[p].fit(data2ncorr[['BallSize', 'FirstDist', 'BallVelocity']], data2ncorr[p])
    
def print_regs_ncorr():
    for p in regsncorr.keys():
        print("{0:<12} = {1:<9.5} + {2:<9.3} * Size + {3:<9.3} * Distantion + {4:<9.3} * Velocity".format(p, regsncorr[p].intercept_, regsncorr[p].coef_[0], regsncorr[p].coef_[1], regsncorr[p].coef_[2]))

devPos = svm.SVC(kernel='linear')
devPos.fit(data2ncorr[['RBallX', 'RBallY']], data2ncorr['UpOrDown'])

def print_dev_ncorr():
    xx, yy = 0, 0
    for i in range(len(devPos.dual_coef_[0])):
        xx+=devPos.dual_coef_[0][i]*devPos.support_vectors_[i][0]
        yy+=devPos.dual_coef_[0][i]*devPos.support_vectors_[i][1]
    print("DevPos       = sign( {0:<9.5} * RBallX + {1:<9.5} * RBallY + {2:<9.5} )".format(xx, yy, devPos.intercept_[0]))

pparamsiner = ['InerTime', 'InerLen']

regsiner = {}
for p in pparamsiner:
    regsiner[p] = linear_model.LinearRegression()
    regsiner[p].fit(data2iner[['BallSize', 'FirstDist', 'BallVelocity']], data2iner[p])
    
print("Dynamic model without green balls")
print("For correlate: ")    
print_regs_corr()
# print_dev_corr()

print("")
print("For not correlate: ")
print_regs_ncorr()
# print_dev_ncorr()
    
print("")
print("Inertion percentage: ", inerPercentage)
for p in regsiner.keys():
        print("{0:<12} = {1:<9.5} + {2:<9.3} * Size + {3:<9.3} * Distantion + {4:<9.3} * Velocity".format(p, regsiner[p].intercept_, regsiner[p].coef_[0], regsiner[p].coef_[1], regsiner[p].coef_[2]))


Dynamic model without green balls
For correlate: 
AbsMaxDev    = 89.506    + 0.188     * Size + 0.0527    * Distantion + -42.1     * Velocity
AllDist      = 399.93    + -0.981    * Size + 1.01      * Distantion + -1.94e+02 * Velocity
AverVelocity = 0.12273   + 0.00187   * Size + 0.000653  * Distantion + 0.233     * Velocity
Delay        = 557.15    + -0.281    * Size + 0.6       * Distantion + -9.7e+02  * Velocity
AllTime      = 1709.6    + -4.89     * Size + 0.952     * Distantion + -1.18e+03 * Velocity

For not correlate: 
AbsMaxDev    = 122.96    + 0.0173    * Size + 0.024     * Distantion + -23.9     * Velocity
AllDist      = 360.8     + -1.24     * Size + 0.936     * Distantion + 3.04e+02  * Velocity
AverVelocity = 0.27987   + 0.00178   * Size + 0.000778  * Distantion + -0.242    * Velocity
Delay        = 219.67    + -0.579    * Size + 0.323     * Distantion + 7.05e+02  * Velocity
AllTime      = 1248.7    + -4.75     * Size + 0.468     * Distantion + 1.07e+03  * Velocity

Inertion percentage:  0.2963562753036437
InerLen      = 52.619    + 1.26      * Size + 0.015     * Distantion + -28.5     * Velocity
InerTime     = 263.72    + -0.231    * Size + 0.0629    * Distantion + -1.12e+02 * Velocity

In [6]:
data2[:10]


Out[6]:
Px Py RBallX RBallY BallSize FirstDist BallVelocity Correlation AllDist AllTime AverVelocity AbsMaxDev UpOrDown Delay InerLen InerTime
0 636 352 -601 -144 178 618.010518 0.000000 1.0 895.353845 1086 0.824451 276.666142 1.0 249 0.000000 0
1 391 148 -194 196 134 275.775271 0.485913 1.0 606.474158 3385 0.179165 278.887503 1.0 2787 0.000000 0
2 841 361 136 -60 173 148.647233 0.326357 1.0 173.905121 702 0.247728 72.130501 1.0 403 0.000000 0
3 1034 586 -172 -83 116 190.979056 0.284553 -1.0 109.319035 1529 0.071497 13.018922 -1.0 1345 0.000000 0
4 162 648 968 -369 37 1035.946427 0.000000 1.0 1169.335063 2295 0.509514 91.107593 -1.0 270 92.617493 246
5 497 402 51 254 18 259.069489 0.105263 1.0 375.496310 2048 0.183348 27.945508 -1.0 348 90.426766 288
6 154 727 106 -590 69 599.446411 0.235702 1.0 720.101978 3123 0.230580 34.267741 1.0 2110 0.000000 0
7 574 505 -301 -332 99 448.135024 0.181818 -1.0 364.162399 839 0.434043 46.256977 1.0 393 0.000000 0
8 685 307 27 -125 81 127.882759 0.282843 -1.0 318.401298 1272 0.250315 98.978497 -1.0 259 138.452158 823
9 572 343 660 20 133 660.302961 0.235702 1.0 1165.458136 4339 0.268601 351.571392 -1.0 2729 0.000000 0

In [7]:
data3 = get_test_data(3)
data3corr = data3[data3.Correlation > 0]
data3ncorr = data3[data3.Correlation < 0]
data3iner = data3[data3.InerLen > 0]

inerPercentage3 = len(data3iner)/len(data3)

data3[:10]

# --- FOR CORRELATE ----
pparamscorr3 = ['AllDist', 'Delay', 'AllTime', 'AverVelocity', 'AbsMaxDev']

regscorr3 = {}
for p in pparamscorr3:
    regscorr3[p] = linear_model.LinearRegression()
    regscorr3[p].fit(data3corr[['BallSize', 'FirstDist', 'BallVelocity']], data3corr[p])
    
def print_regs_corr3():
    for p in regscorr3.keys():
        print("{0:<12} = {1:<9.5} + {2:<9.3} * Size + {3:<9.3} * Distantion + {4:<9.3} * Velocity".format(p, regscorr3[p].intercept_, regscorr3[p].coef_[0], regscorr3[p].coef_[1], regscorr3[p].coef_[2]))

# --- FOR NOT CORRELATE ----
pparamsncorr3 = ['AllDist', 'Delay', 'AllTime', 'AverVelocity', 'AbsMaxDev']

regsncorr3 = {}
for p in pparamsncorr3:
    regsncorr3[p] = linear_model.LinearRegression()
    regsncorr3[p].fit(data3ncorr[['BallSize', 'FirstDist', 'BallVelocity']], data3ncorr[p])
    
def print_regs_ncorr3():
    for p in regsncorr3.keys():
        print("{0:<12} = {1:<9.5} + {2:<9.3} * Size + {3:<9.3} * Distantion + {4:<9.3} * Velocity".format(p, regsncorr3[p].intercept_, regsncorr3[p].coef_[0], regsncorr3[p].coef_[1], regsncorr3[p].coef_[2]))

pparamsiner3 = ['InerTime', 'InerLen']

regsiner3 = {}
for p in pparamsiner3:
    regsiner3[p] = linear_model.LinearRegression()
    regsiner3[p].fit(data3iner[['BallSize', 'FirstDist', 'BallVelocity']], data3iner[p])
    
print("Dynamic model with green balls")
print("For correlate: ")
print_regs_corr3()
# print_dev_corr()

print("")
print("For not correlate: ")
print_regs_ncorr3()
# print_dev_ncorr()
    
print("")
print("Inertion percentage: ", inerPercentage3)
for p in regsiner3.keys():
        print("{0:<12} = {1:<9.5} + {2:<9.3} * Size + {3:<9.3} * Distantion + {4:<9.3} * Velocity".format(p, regsiner3[p].intercept_, regsiner3[p].coef_[0], regsiner3[p].coef_[1], regsiner3[p].coef_[2]))


Dynamic model with green balls
For correlate: 
AbsMaxDev    = 108.23    + 0.155     * Size + 0.0229    * Distantion + -31.4     * Velocity
AllDist      = 466.14    + -0.984    * Size + 0.915     * Distantion + -1.05e+02 * Velocity
AverVelocity = 0.20849   + 0.00181   * Size + 0.000618  * Distantion + 0.132     * Velocity
Delay        = 419.98    + -0.207    * Size + 0.458     * Distantion + -4.33e+02 * Velocity
AllTime      = 1626.9    + -4.52     * Size + 0.734     * Distantion + -5.63e+02 * Velocity

For not correlate: 
AbsMaxDev    = 130.97    + 0.0451    * Size + 0.0091    * Distantion + -22.4     * Velocity
AllDist      = 430.62    + -1.05     * Size + 0.87      * Distantion + 1.58e+02  * Velocity
AverVelocity = 0.2681    + 0.0016    * Size + 0.000711  * Distantion + 0.059     * Velocity
Delay        = 498.0     + -0.634    * Size + 0.282     * Distantion + -2.88e+02 * Velocity
AllTime      = 1528.9    + -4.61     * Size + 0.471     * Distantion + 22.9      * Velocity

Inertion percentage:  0.32327166504381694
InerLen      = 50.275    + 1.28      * Size + 0.0122    * Distantion + -17.1     * Velocity
InerTime     = 245.77    + -0.0135   * Size + 0.0216    * Distantion + -18.2     * Velocity

In [20]:
data3.plot.scatter(x='FirstDist', y='AllDist')
data3.plot.scatter(x='FirstDist', y='AverVelocity')
data3.plot.scatter(x='BallSize', y='Delay')
data3.plot.scatter(x='FirstDist', y='Delay')
data3.plot.scatter(x='BallSize', y='AllTime')
data3.plot.scatter(x='FirstDist', y='AllTime')


Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f87766cf240>

In [15]:
cond = data3.UpOrDown > 0
dup = data3.RBallY > 0
ddown = data3.RBallY < 0
dright = data3.RBallX > 0
dleft = data3.RBallX < 0

upright = data3[dup][dright]
upleft = data3[dup][dleft]
downright = data3[ddown][dright]
downleft = data3[ddown][dleft]


/home/vagrant/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:7: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
/home/vagrant/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:8: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
/home/vagrant/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:9: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
/home/vagrant/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:10: UserWarning: Boolean Series key will be reindexed to match DataFrame index.

In [16]:
subset_a = upright[cond].dropna()
subset_b = upright[~cond].dropna()
plt.scatter(subset_a.RBallX, subset_a.RBallY, s=60, c='b', label='Up')
plt.scatter(subset_b.RBallX, subset_b.RBallY, s=60, c='r', label='Down') 
plt.legend()


/home/vagrant/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  if __name__ == '__main__':
/home/vagrant/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  from ipykernel import kernelapp as app
Out[16]:
<matplotlib.legend.Legend at 0x7f8777a36828>

In [17]:
subset_a = upleft[cond].dropna()
subset_b = upleft[~cond].dropna()
plt.scatter(subset_a.RBallX, subset_a.RBallY, s=60, c='b', label='Up')
plt.scatter(subset_b.RBallX, subset_b.RBallY, s=60, c='r', label='Down') 
plt.legend()


/home/vagrant/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  if __name__ == '__main__':
/home/vagrant/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  from ipykernel import kernelapp as app
Out[17]:
<matplotlib.legend.Legend at 0x7f87779a2e80>

In [18]:
subset_a = downleft[cond].dropna()
subset_b = downleft[~cond].dropna()
plt.scatter(subset_a.RBallX, subset_a.RBallY, s=60, c='b', label='Up')
plt.scatter(subset_b.RBallX, subset_b.RBallY, s=60, c='r', label='Down') 
plt.legend()


/home/vagrant/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  if __name__ == '__main__':
/home/vagrant/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  from ipykernel import kernelapp as app
Out[18]:
<matplotlib.legend.Legend at 0x7f8777988a20>

In [19]:
subset_a = downright[cond].dropna()
subset_b = downright[~cond].dropna()
plt.scatter(subset_a.RBallX, subset_a.RBallY, s=60, c='b', label='Up')
plt.scatter(subset_b.RBallX, subset_b.RBallY, s=60, c='r', label='Down') 
plt.legend()


/home/vagrant/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  if __name__ == '__main__':
/home/vagrant/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  from ipykernel import kernelapp as app
Out[19]:
<matplotlib.legend.Legend at 0x7f87778f4cc0>

In [ ]:


In [14]:
for i in pparams:
    print(i, " : ", data1['RorL'].corr(data1[i], method='pearson'))
    
rorl = linear_model.LinearRegression()

p = "RorL"

rorl.fit(data1[['BallSize', 'FirstDist']], data1[p])

print("{0:<12} = {1:<8.5} + {2:<8.3} * Size + {3:<9.3} * Distantion".format(p, rorl.intercept_, rorl.coef_[0], rorl.coef_[1]))
print("So, we can see, that correlation beetween this parameters too small and we can ignore thay")


AllDist  :  -0.112081128814
Delay  :  0.0100508459643
AllTime  :  -0.086333771138
AverVelocity  :  -0.104623984595
MaxDev  :  -0.134595951175
InerLen  :  -0.123790948141
InerTime  :  -0.106976738295
Shots  :  0.0432156371795
RorL         = 0.066416 + 0.000454 * Size + 3.16e-05  * Distantion
So, we can see, that correlation beetween this parameters too small and we can ignore thay

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: