Exercise 1 (Sarah)


In [51]:
import os

def go_for_a_walk(d):
    for root, dirs, files in os.walk(d, topdown=True):
        for name in files:
            print os.path.join(root, name)

go_for_a_walk(os.getcwd())


/Users/dr9/Developer/team-code/python-club/notebooks/a.txt
/Users/dr9/Developer/team-code/python-club/notebooks/anagram_sets.py
/Users/dr9/Developer/team-code/python-club/notebooks/anagram_sets.pyc
/Users/dr9/Developer/team-code/python-club/notebooks/anagrams.db.db
/Users/dr9/Developer/team-code/python-club/notebooks/b
/Users/dr9/Developer/team-code/python-club/notebooks/c06d
/Users/dr9/Developer/team-code/python-club/notebooks/patterns.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-1.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-10-solutions.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-10.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-11-solutions-final.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-11-solutions.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-11.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-12-solutions-final.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-12-solutions.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-12.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-14-solutions-Copy1.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-14-solutions.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-14.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-2-solutions.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-2.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-3-solutions.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-3.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-4-solutions.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-4.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-5-solutions.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-5.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-6-solutions.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-6.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-7-solutions.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-7.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-8-solutions.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-8.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-9.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/python-club-solutions-9.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/stopwatch.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/test.txt
/Users/dr9/Developer/team-code/python-club/notebooks/Untitled.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/Untitled2-Copy0.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/Vagrantfile
/Users/dr9/Developer/team-code/python-club/notebooks/words.txt
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/parsing-hits-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/patterns-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-1-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-10-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-10-solutions-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-11-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-11-solutions-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-11-solutions-final-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-12-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-12-solutions-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-12-solutions-final-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-14-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-14-solutions-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-14-solutions-Copy1-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-2-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-2-solutions-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-3-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-3-solutions-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-4-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-4-solutions-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-5-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-5-solutions-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-6-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-6-solutions-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-7-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-7-solutions-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-8-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-8-solutions-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-9-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/python-club-solutions-9-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/stopwatch-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.ipynb_checkpoints/Untitled-checkpoint.ipynb
/Users/dr9/Developer/team-code/python-club/notebooks/.vagrant/machines/default/virtualbox/action_provision
/Users/dr9/Developer/team-code/python-club/notebooks/.vagrant/machines/default/virtualbox/action_set_name
/Users/dr9/Developer/team-code/python-club/notebooks/.vagrant/machines/default/virtualbox/id
/Users/dr9/Developer/team-code/python-club/notebooks/.vagrant/machines/default/virtualbox/index_uuid
/Users/dr9/Developer/team-code/python-club/notebooks/.vagrant/machines/default/virtualbox/private_key
/Users/dr9/Developer/team-code/python-club/notebooks/.vagrant/machines/default/virtualbox/synced_folders

Exercise 2 (Wendy)


In [57]:
!cat exercise14-2.py


"""Exercise 14.2. Write a function called sed that takes as arguments a pattern string, a replacement string, and two filenames; it should read the first file and write the contents into the second file (creating it if necessary). If the pattern string appears anywhere in the file, it should be replaced with the replacement string.
"""

import os
import sys
import string

def main():
    # Enter pattern string and replacement string
    pattern_string = sys.argv [1]
    replacement_string = sys.argv [2]

    #Open the file to read (fin) and the file to output to. Raise exceptions if there are problems.  If no output file has been created then create this.

    try:
       fin = open(sys.argv[3])
    except:
       print 'Something went wrong with opening the first file.'

    try:
       fout = open(sys.argv[4], 'w')
    except:
       print 'Something went wrong with opening the second file it has been created'
       fout = open('output.txt', 'w')

    #Read through the input file and replace any of the pattern  strings found with a replacement string.
    for line in fin:
       str(line)
       new_line = string.replace(line, pattern_string, replacement_string)
       fout.write(new_line)

    #Close the input file and the output file
    fin.close()
    fout.close()

if __name__ == "__main__":
    main()

In [58]:
!echo 'This is a testing string that may work' > file_in
!python exercise14-2.py may does file_in file_out
!cat file_out


This is a testing string that does work

Exercise 3 (Dan)


In [43]:
import anagram_sets
import shelve

def store_anagrams(filename):
    shelf = shelve.open(filename, 'c')
    d = anagram_sets.all_anagrams('words.txt')
    for word, word_list in d.iteritems():
        shelf[word] =  word_list
    shelf.close()

def read_anagrams(filename, word):
    shelf = shelve.open(filename)
    sig = signature(word)
    try:
        return shelf[sig]
    except KeyError:
        return []

In [44]:
store_anagrams('anagrams.db')

In [50]:
read_anagrams('anagrams.db', 'cat')


Out[50]:
['act', 'cat']

Exercise 4 (Liu)


In [61]:
import os

def walk(workdir):
   """
   'walk' take a specified Folder and return all the files in a List within it.
   """
   list_all_files = []
   for root, dirs, files in os.walk(workdir, topdown=True):
       for name in files:
           list_all_files.append(os.path.join(root, name))
   return list_all_files

def make_md5sum(list_files, suffix):
   """
   'make_md5sum' take a List of files and a suffix, then return a dictonary with the md5sums 
   of the files with the specified sufix as the keys
   """
   d_md5sum = {}
   for each_file in list_files:
       if not each_file.endswith(suffix):
           continue
       cmd_md5 = 'md5 ' + each_file
       fp = os.popen(cmd_md5)
       res = fp.read()
       fp.close()
       md5_sum = res.strip().split(' ')[-1]
       if md5_sum not in d_md5sum:
           d_md5sum[md5_sum] = [each_file]
       else:
           d_md5sum[md5_sum].append(each_file)
   return d_md5sum

def find_duplicates (dict_md5sum):
   """
   'find_duplicates' take a Dictionary of md5sum and output those with more than 1 files attached to it
   """
   for each_md5sum in dict_md5sum:
       if len(dict_md5sum[each_md5sum]) > 1:
           print 'Dups found with md5sum "' + each_md5sum + '":'
           for each_file in dict_md5sum[each_md5sum]:
               print '\t' + each_file


Dups found with md5sum "d41d8cd98f00b204e9800998ecf8427e":
	/Users/dr9/Developer/team-code/python-club/notebooks/a.txt
	/Users/dr9/Developer/team-code/python-club/notebooks/output.txt
	/Users/dr9/Developer/team-code/python-club/notebooks/test.txt

In [66]:
!echo testing > testing.txt
!cp testing.txt testing_dup.txt
all_files = walk(os.getcwd())
dict_md5sum = make_md5sum(all_files, '.txt')
find_duplicates(dict_md5sum)


Dups found with md5sum "eb1a3227cdc3fedbaec2fe38bf6c044a":
	/Users/dr9/Developer/team-code/python-club/notebooks/testing.txt
	/Users/dr9/Developer/team-code/python-club/notebooks/testing_dup.txt
Dups found with md5sum "d41d8cd98f00b204e9800998ecf8427e":
	/Users/dr9/Developer/team-code/python-club/notebooks/a.txt
	/Users/dr9/Developer/team-code/python-club/notebooks/output.txt
	/Users/dr9/Developer/team-code/python-club/notebooks/test.txt