In [7]:

    
import numpy
import scipy.stats
import pandas

def compare_averages(filename):
    """
    Performs a t-test on two sets of baseball data (left-handed and right-handed hitters).

    You will be given a csv file that has three columns.  A player's
    name, handedness (L for lefthanded or R for righthanded) and their
    career batting average (called 'avg'). You can look at the csv
    file via the following link:
    https://www.dropbox.com/s/xcn0u2uxm8c4n6l/baseball_data.csv
    
    Write a function that will read that the csv file into a pandas data frame,
    and run Welch's t-test on the two cohorts defined by handedness.
    
    One cohort should be a data frame of right-handed batters. And the other
    cohort should be a data frame of left-handed batters.
    
    We have included the scipy.stats library to help you write
    or implement Welch's t-test:
    http://docs.scipy.org/doc/scipy/reference/stats.html
    
    With a significance level of 95%, if there is no difference
    between the two cohorts, return a tuple consisting of
    True, and then the tuple returned by scipy.stats.ttest.  
    
    If there is a difference, return a tuple consisting of
    False, and then the tuple returned by scipy.stats.ttest.
    
    For example, the tuple that you return may look like:
    (True, (9.93570222, 0.000023))
    """
    data = pandas.read_csv(filename)
    right = data[data["handedness"] == "R"]
    left = data[data["handedness"] == "L"]
    ttest = scipy.stats.ttest_ind(right["avg"],left["avg"], equal_var = False)

    if ttest[1] <= 0.05:
        return (False,ttest)
    else:
        return (True,ttest)



In [8]:

    
filename ="C:/Vindico/Projects/Data/Course/Python/Udacity/Introduction to Data Science/Lesson 3/baseball_data.csv"
compare_averages(filename)









    Out[8]:





(False, (-3.9867064465971302, 7.482391590970722e-05))