In [1]:
% cat 144bbs2.txt | python egrep.py "[0-9]" | python line_count.py


1861

In [2]:
% cat the_bible.txt | python most_common_words.py 10


46404	the
36173	and
25421	of
9209	to
8419	in
7961	that
6670	shall
6595	he
6398	his
5945	unto

In [3]:
from collections import Counter 

def get_domain(email_address):
    return email_address.lower().split("@")[-1]

with open("email_address.txt", "r") as f:
    domain_counts = Counter(get_domain(line.strip()) for line in f if "@" in line)

domain_counts


Out[3]:
Counter({'aims.ac.za': 2,
         'aip.de': 1,
         'campus.ru.ac.za': 2,
         'dut.ac.za': 3,
         'ehu.es': 1,
         'fis.ucm.es': 1,
         'gmail.com': 28,
         'iburst.co.za': 1,
         'icra.it': 1,
         'icranet.org': 1,
         'ieec.uab.es': 1,
         'kasi.re.kr': 1,
         'myuct.ac.za': 1,
         'ru.ac.za': 4,
         'stu.ukzn.ac.za': 1,
         'sun.ac.za': 1,
         'uclouvain.be': 2,
         'uct.ac.za': 2,
         'uj.ac.za': 1,
         'ukzn.ac.za': 3,
         'unizulu.ac.za': 2,
         'uofk.edu': 1,
         'wits.ac.za': 3,
         'yahoo.co.uk': 1,
         'yahoo.com': 1})

In [4]:
import csv

result = []
with open("tab_delimited_stock_prices.txt", "r") as f:
    reader = csv.reader(f, delimiter="\t")
    for row in reader:
        date = row[0]
        symbol = row[1]
        closing_price = float(row[2])
        result.append([date,symbol,closing_price])
              
result[:5]


Out[4]:
[['6/20/2014', 'AAPL', 90.91],
 ['6/20/2014', 'MSFT', 41.68],
 ['6/20/2014', 'FB', 64.5],
 ['6/19/2014', 'AAPL', 91.86],
 ['6/19/2014', 'MSFT', 41.51]]