notebook.community

Edit and run



In [13]:

    
import wikipedia
import re
import numpy as np
import pandas as pd



In [14]:

    
# read in list of movies 
df = pd.read_csv('movie.csv', encoding = "ISO-8859-1")



In [15]:

    
df.head()









    Out[15]:






  
    
      
      title
      year
    
  
  
    
      0
      Dead Awake
      2016
    
    
      1
      Krish Trish and Baltiboy - Battle of Wits
      2013
    
    
      2
      Krish Trish and Baltiboy - Best Friends Forever
      2016
    
    
      3
      Krish Trish and Baltiboy - Comics of India
      2012
    
    
      4
      Krish Trish and Baltiboy - Oversmartness Never...
      2017



In [ ]:

    
data = pd.DataFrame()
titles = []
original_titles = []
content_list = []
#for title in title_list:
for title in df.loc[0:1000,'title']:
    regex = re.compile(re.compile(re.escape(title)+r' \(\d*\s*film\)|'+re.escape(title)+r'$'))
    regex_match = np.vectorize(regex.match)
    try: 
        search_list = wikipedia.search(title+' film')
        movie_list = regex_match(search_list)
    except IndexError:
        continue
    match_list = []
    for i in range(movie_list.shape[0]):
        if movie_list[i] is not None:
            match_list.append(movie_list[i].group(0))
    titles.extend(match_list)
    for name in match_list:
        try:
            page=wikipedia.page(name)
        except:
            content_list.append('')
            continue
        content = page.content
        content_list.append(content)
    original_titles.extend(np.repeat(title,len(match_list)))



In [17]:

    
data['title'] = original_titles
data['wiki_title'] = titles
data['content'] = content_list



In [19]:

    
data.head()









    Out[19]:






  
    
      
      title
      wiki_title
      content
    
  
  
    
      0
      Dead Awake
      Dead Awake (2016 film)
      Dead Awake is a 2016 American supernatural psy...
    
    
      1
      Dead Awake
      Dead Awake (2010 film)
      Dead Awake is a 2010 mystery film starring Nic...
    
    
      2
      Dead Awake
      Dead Awake (2001 film)
      Dead Awake is a 2001 mystery film starring Ste...
    
    
      3
      Dead Awake
      Dead Awake
      
    
    
      4
      Mighty Raju Rio Calling
      Mighty Raju Rio Calling
      Mighty Raju: Rio Calling is an Indian animated...



In [20]:

    
#data.to_csv('reviews_wiki_0_1000_v2.csv')



In [ ]:

	title	year
0	Dead Awake	2016
1	Krish Trish and Baltiboy - Battle of Wits	2013
2	Krish Trish and Baltiboy - Best Friends Forever	2016
3	Krish Trish and Baltiboy - Comics of India	2012
4	Krish Trish and Baltiboy - Oversmartness Never...	2017

	title	wiki_title	content
0	Dead Awake	Dead Awake (2016 film)	Dead Awake is a 2016 American supernatural psy...
1	Dead Awake	Dead Awake (2010 film)	Dead Awake is a 2010 mystery film starring Nic...
2	Dead Awake	Dead Awake (2001 film)	Dead Awake is a 2001 mystery film starring Ste...
3	Dead Awake	Dead Awake
4	Mighty Raju Rio Calling	Mighty Raju Rio Calling	Mighty Raju: Rio Calling is an Indian animated...