Statistics with Lists!


In [ ]:
import math

from scipy import stats
import scipy

import string
def getNumbers():
    nums = []     # start with an empty list

    # sentinel loop to get numbers
    xStr = raw_input("Enter a number (<Enter> to quit): ")
    while xStr != "":
        x = int(xStr)
        nums.append(x)   # add this value to the list
        xStr = raw_input("Enter a number (<Enter> to quit: ")
    return nums

In [ ]:
def mean(nums):
    summ = 0.0
    for num in nums:
        summ = summ + num
    return summ / len(nums)

In [ ]:
def stdDev(nums, xbar):
    sumDevSq = 0.0
    for num in nums:
        dev = xbar - num
        sumDevSq = sumDevSq+dev**2
##        sumDevSq = sumDevSq + dev * dev
    return math.sqrt(sumDevSq/(len(nums)-1))

In [ ]:
def median(nums):
    nums.sort()
    size = len(nums)
    midPos = size / 2
    if size % 2 == 0:
        median = (nums[midPos] + nums[midPos-1]) / 2.0
    else:
        median = nums[midPos]
    return median

In [ ]:
def main():
    data = getNumbers()
    #data = [2,4,6,9,13]
    xbar = mean(data)
    std = stdDev(data, xbar)

##    std = scipy.stats.tstd(data)
##    print std
    med = median(data)
    print "The mean of this set is numbers is %.2f, the std is %.2f,"\
    "and the median is %.2f" %(xbar, std, med)
##    
##
##    
##
main()

Practice: Dictionaries


In [ ]:
psswrd = {}
psswrd["cara"] = "singing"
psswrd['susana'] = 'batman'
psswrd['michele'] = 'purple'

print psswrd

In [ ]:
print psswrd.keys()
print psswrd.values()
print psswrd.items()

Practice: Word Frequency (with Dictionaries!)

To be viewed after practicum


In [ ]:
import string

def getFile(filename):
    infile = open(filename, "r")
    text = infile.read()
    infile.close()
    return text

def wordCount():
    filename = "c:/Python27/Methods_1/book_34.txt"
    text = getFile(filename)
    text = text.lower()
    punc_list = [".", "?", "-", "!", '"', "'", "$", ";", ":",",", "(", ")",
                 "\x92", "\x93", "\x94"]
    ## replace punctuations with a space
    for char in punc_list:
        text = string.replace(text, char, " ")
               
    words = text.split()
    ##print words[400:500]

    counts = {}
    for w in words:
        if counts.has_key(w):
            counts[w] = counts[w] + 1
        else:
            counts[w] = 1

    word_freq_list = counts.items()
    return word_freq_list

def main():
    word_freq_list = wordCount()
    print word_freq_list[:100]
    


main()

Practice: Order Word Count Lists


In [ ]:
def Order(items):
## Inputs to this function are a list of word frequencies
## Outputs a list sorted based on frequency 
    values = []
    ordered_freq = []
    for i in range(len(items)):
        count = items[i][1]
        idx = i
        val_tup = (count, idx)
        values.append(val_tup)

## Sorts by the first item
    values.sort()
## Reverses the list - most frequent first
    values.reverse()
    for thing in values:
        idx = thing[1]
        ordered = items[idx]
        ordered_freq.append(ordered)
        
    return ordered_freq


def main():
    word_freq_list = wordCount()
    final = Order(word_freq_list)
    print final[:100]

main()