Remember: Pair-Programming → take care of your neighbor
In [1]:
def K_to_C(t):
"""Convert temperature in Kelvin to Celsius"""
return t - 273.15
def F_to_C(t):
"""Convert temperature in Fahrenheit to Celsius"""
return 5*(t-32.)/9.
def C_to_K(t):
"""Convert temperature in Celsius to Kelvin"""
return t + 273.15
def C_to_F(t):
"""Convert temperature in Celsius to Fahrenheit"""
return 32+(t*9/5.)
def tconverter(temp, output_unit):
"""Temperature converter from Kelvin, Fahrenheit, Celsius to Kelvin, Fahrenheit, Celsius.
Input temperature `temp` is a string with unit, e.g. '10C' or '-13F' or '235K'
Output unit `output_unit` is one of 'C', 'F', 'K'
"""
units = ('C', 'F', 'K')
conversion = {'KC': K_to_C,
'FC': F_to_C,
'CK': C_to_K,
'CF': C_to_F}
# check that the input temperature has the right format
if not isinstance(temp, str) or temp[-1] not in units:
raise TypeError('Input temperature '+str(temp)+' has the wrong format!')
# check that requested output unit is known
if output_unit not in units:
raise ValueError('Output unit '+str(output_unit)+' not known!')
# set input unit
input_unit = temp[-1]
# set input temperature
temp = float(temp[:-1])
# convert input temperature to Celsius
if input_unit != 'C':
temp = conversion[input_unit+'C'](temp)
# convert temperature in Celsius to output unit
if output_unit != 'C':
temp = conversion['C'+output_unit](temp)
return temp
In [2]:
tconverter('10C', 'K')
Out[2]:
In [3]:
tconverter('-123F', 'C')
Out[3]:
In [4]:
tconverter('148F', 'K')
Out[4]:
In [5]:
tconverter(10, 'F')
In [6]:
tconverter('123', 'C')
In [11]:
tconverter('12C', 'L')
{'house': 'boat'}
]
In [13]:
# read the input text
text = open("text.txt", "r")
In [14]:
# show first ten line of the file (this is not Python)
!head -n 20 text.txt
In [15]:
def standardize_word(word):
"""Standardize word.
A standard word is lower cased and only contains alphabetic
characters. Return two strings: (std_word, illegal_chars)
"""
# string containing the illegal characters found in the word
illegal = ''
# make word lower-case
word = word.lower()
if not word.isalpha():
# there are non alphabetic chars in the word
for char in word:
# collect non alphabetic chars
if not char.isalpha():
illegal += char
# remove non alphabetic chars from the word
word = word.translate(None, illegal)
return word, illegal
In [16]:
# initialize the word frequency map
wfreq = {}
# set of illegal (i.e. non alphabetic)characters found in the text
# start with an empty set
illegal = set()
# word counter
word_count = 0
# iterate over the lines in the input file
for line in text:
# split the line into words
# at this point, words may still contain illegal characters and have mixed case
words = line.split()
# iterate over words in the current line
for word in words:
# standardize words and get back the illegal characters
stdword, illegal_chars = standardize_word(word)
# update set of illegal chars
illegal.update(illegal_chars)
# we have got a new word
word_count += 1
# if the word is already in the dictionary update the counter,
# otherwise initialize the counter
wfreq[stdword] = wfreq.setdefault(stdword, 0) + 1
#if stdword in wfreq:
# wfreq[stdword] += 1
#else:
# # the word was not in the dictionary already: initialize the counter
# wfreq[stdword] = 1
In [17]:
print "Total number of words in the text:", word_count
In [18]:
print "Number of different words:", len(wfreq)
In [19]:
print "Illegal characters in text:", list(illegal)
In [20]:
# we now want to see what words are occurring more often:
# Sort the dictionary by value (this a so-called Python idiom)
import operator
wfreq_sorted = sorted(wfreq.items(), key=operator.itemgetter(1), reverse=True)
In [21]:
# print the 50 more frequent words
print wfreq_sorted[:50]
In [22]:
def load_deception_map(filename, on_error="fail"):
"""Load a deception map from file.
Expected format is two words per line. If illegal characters
are detected, the behaviour depends on on_error:
if on_error is "fail" throw a ValueError, if on_error is "ignore"
ignore illegal chars.
"""
# initialize the deception map
dmap = {}
# open deception map file
fh = open(filename, 'r')
# iterate over lines in the file
for count, line in enumerate(fh):
# get the words on the current line
words = line.split()
if len(words) == 0:
# this is an empty line, we can skip to the next line
continue
elif len(words) != 2:
# we are expecting exactly two words per line
# this must be an invalid line: throw an error!
raise ValueError('Too many/few words on line '+str(count))
# Standardize words
key, illegal1 = standardize_word(words[0])
value, illegal2 = standardize_word(words[1])
if on_error == 'fail' and len(illegal1+illegal2) != 0:
# we are asked to fail if there are illegal characters in the current line
raise ValueError('Illegal chars on line '+str(count)+':'+illegal1+illegal2)
# add the word to the deception map
dmap[key] = value
# no checking is done to ensure that the word is not already in the map,
# i.e. the map may be inconsistent or redundant or non-invertible
return dmap
In [24]:
# load deception map from file
deception = load_deception_map('deception.dict')
In [25]:
deception
Out[25]:
In [26]:
def deceive_text(filename, deception_map):
"""Return a version of text in filename deceived by deception_map"""
text = open(filename, 'r')
# initiliaze a list of output lines
output = []
# iterate over the lines of the input text
for line in text:
# get the words on the current line
words = line.split()
# initiliaze the output line
newline = []
# iterate over the words on the current line
for word in words:
# standardize word
stdword, illegal_chars = standardize_word(word)
if stdword in deception_map:
# this word needs to be replaced, but we want to
# maintain the illegal chars (case is lost in translation)
# 1. make the word lower case
newword = word.lower()
# 2. replace within the original word+illegal_chars
newword = newword.replace(stdword, deception_map[stdword])
# 3. append the resulting word+illegal_chars in the current output line list of words
newline.append(newword)
else:
# this word does not need to be replaced, just put the word as-is in the output line list
newline.append(word)
# the output line we want to have consists of all words joined with a white space
newline = ' '.join(newline)
# append the current output line to the list of output lines
output.append(newline)
# the output text consists of all output lines joined with newline characters
output = '\n'.join(output)
return output
In [27]:
# get the deceived version of our text
newalice = deceive_text('text.txt', deception)
In [28]:
# show the first 20 lines
print '\n'.join(newalice.split('\n')[:20])
In [29]:
# write out the modified text
newtext = open('newtext.txt', 'w')
newtext.write(newalice)
newtext.close()
In [ ]: