notebook.community

Edit and run



In [1]:

    
% cat 144bbs2.txt | python egrep.py "[0-9]" | python line_count.py



In [2]:

    
% cat the_bible.txt | python most_common_words.py 10









    



46404	the
36173	and
25421	of
9209	to
8419	in
7961	that
6670	shall
6595	he
6398	his
5945	unto



In [3]:

    
from collections import Counter 

def get_domain(email_address):
    return email_address.lower().split("@")[-1]

with open("email_address.txt", "r") as f:
    domain_counts = Counter(get_domain(line.strip()) for line in f if "@" in line)

domain_counts









    Out[3]:





Counter({'aims.ac.za': 2,
         'aip.de': 1,
         'campus.ru.ac.za': 2,
         'dut.ac.za': 3,
         'ehu.es': 1,
         'fis.ucm.es': 1,
         'gmail.com': 28,
         'iburst.co.za': 1,
         'icra.it': 1,
         'icranet.org': 1,
         'ieec.uab.es': 1,
         'kasi.re.kr': 1,
         'myuct.ac.za': 1,
         'ru.ac.za': 4,
         'stu.ukzn.ac.za': 1,
         'sun.ac.za': 1,
         'uclouvain.be': 2,
         'uct.ac.za': 2,
         'uj.ac.za': 1,
         'ukzn.ac.za': 3,
         'unizulu.ac.za': 2,
         'uofk.edu': 1,
         'wits.ac.za': 3,
         'yahoo.co.uk': 1,
         'yahoo.com': 1})



In [4]:

    
import csv

result = []
with open("tab_delimited_stock_prices.txt", "r") as f:
    reader = csv.reader(f, delimiter="\t")
    for row in reader:
        date = row[0]
        symbol = row[1]
        closing_price = float(row[2])
        result.append([date,symbol,closing_price])
              
result[:5]









    Out[4]:





[['6/20/2014', 'AAPL', 90.91],
 ['6/20/2014', 'MSFT', 41.68],
 ['6/20/2014', 'FB', 64.5],
 ['6/19/2014', 'AAPL', 91.86],
 ['6/19/2014', 'MSFT', 41.51]]