In [ ]:
###a:  quote is the list question a is asking for

fhand= open('quotes.txt')
quote = []
n = 0
for line in fhand:
    line=line.rstrip()
    n+=1
    if n % 2 != 0: a = line
    else: quote.append(a + '-'+ line )

print quote[0] # test quote[0] result

In [ ]:
###b: splite every quote into words
import re

def quote2word(a):
    word=re.split('\W+', a)
    return [x.lower() for x in word]
    
    
print quote2word(quote[0]) # test quote[0] result

In [ ]:
###c: founction postinglist count each word in each quote and return a dictionary contains value as key, count as value. 
from collections import Counter
def postinglist(a):
    word=quote2word(a)
    wordCount = dict(Counter(word))  
    return wordCount

print postinglist(quote[0]) # test quote[0] result

In [ ]:
###d: repostlist is the reverse pisting-list dictionary 
Repostlist = dict()
valuelist= dict()

def reverse_postinglist(a):
    word = postinglist(a)
    for k,v in word.items():
        if k in Repostlist:
            Repostlist[k][a]=v
        else:
            b=dict()
            b[a]=v
            Repostlist[k]=b
    
for list in quote:
    reverse_postinglist(list)

    
print Repostlist

In [ ]:
###e: 
  
def TF(w,a):
    pl=postinglist(a)
    TFvalue=pl[w]/max(pl.values())
    return TFvalue

import math
def IDF(w):
    IDFvalue=math.log(895/len(Repostlist[w]))
    return IDFvalue
    
def TF_IDF(w,a):
    return TF(w,a)*IDF(w)
    
    
print TF_IDF('we',quote[0])

In [ ]:
###f:
def Quote_search_single(w):
    dict1=Repostlist[w]
    dict2=dict()
    for k in dict1:
        a = TF_IDF(w,k)
        dict2[k]= a
    return dict2

print Quote_search_single('we')

In [8]:
###g

def Quote_search_multiple(list):
    dict2=dict()
    for w in list:
        #print w
        dict1=Quote_search_single(w)
        #print dict1
        for k,v in dict1.iteritems():
            #print k,v
            if k in dict2: dict2[k]=dict2[k]*v
            else:          dict2[k]=v
    return dict2

print len(Quote_search_multiple(['we','he']))


146

In [ ]: