In [1]:
from flask import Flask, request, render_template, jsonify
import json
import requests
import socket
import time
from datetime import datetime
import numpy as np 
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from MoviePosters import MoviePosters
import urllib
moviefiles = np.load('../../data/feature_movie_name.npz')
movie_list = moviefiles['arr_0']


def movie_poster(title):
    req = MoviePosters()
    req.imdb_id_from_title(title)
    poster_url = req.get_poster_url()     
    return poster_url

def all_posters(save_path, movie_list = movie_list):
    posters_url = []
    for name in movie_list:
        title = name.split('(')[0]
        poster_url = movie_poster(title)
        posters_url.append(poster_url)
    posters_url = np.array(posters_url)
    np.save(save_path, posters_url)

In [3]:
posters_url = []
    for name in movie_list:
        title = name.split('(')[0]
        poster_url = movie_poster(title)
        posters_url.append(poster_url)
    posters_url = np.array(posters_url)
#     np.save(save_path, posters_url)


---------------------------------------------------------------------------
JSONDecodeError                           Traceback (most recent call last)
<ipython-input-3-d8e2ba2d589c> in <module>()
      2 for name in movie_list:
      3     title = name.split('(')[0]
----> 4     poster_url = movie_poster(title)
      5     posters_url.append(poster_url)
      6 posters_url = np.array(posters_url)

<ipython-input-1-0ee48ea86dd3> in movie_poster(title)
     16 def movie_poster(title):
     17     req = MoviePosters()
---> 18     req.imdb_id_from_title(title)
     19     poster_url = req.get_poster_url()
     20     return poster_url

/Users/zoesh/Desktop/gSchool/MovieForceApp/code/web_app/MoviePosters.pyc in imdb_id_from_title(self, title)
     27         url = pattern.format(movie_title=urllib.quote(title))
     28         r = requests.get(url)
---> 29         res = r.json()
     30         # sections in descending order or preference
     31         for section in ['popular','exact','substring']:

/Users/zoesh/anaconda/lib/python2.7/site-packages/requests/models.pyc in json(self, **kwargs)
    803                     # used.
    804                     pass
--> 805         return complexjson.loads(self.text, **kwargs)
    806 
    807     @property

/Users/zoesh/anaconda/lib/python2.7/site-packages/simplejson/__init__.pyc in loads(s, encoding, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, use_decimal, **kw)
    514             parse_constant is None and object_pairs_hook is None
    515             and not use_decimal and not kw):
--> 516         return _default_decoder.decode(s)
    517     if cls is None:
    518         cls = JSONDecoder

/Users/zoesh/anaconda/lib/python2.7/site-packages/simplejson/decoder.pyc in decode(self, s, _w, _PY3)
    368         if _PY3 and isinstance(s, binary_type):
    369             s = s.decode(self.encoding)
--> 370         obj, end = self.raw_decode(s)
    371         end = _w(s, end).end()
    372         if end != len(s):

/Users/zoesh/anaconda/lib/python2.7/site-packages/simplejson/decoder.pyc in raw_decode(self, s, idx, _w, _PY3)
    398             elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf':
    399                 idx += 3
--> 400         return self.scan_once(s, idx=_w(s, idx).end())

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [4]:
movie_list[1].split('(')[0]


Out[4]:
"'A' gai wak "

In [12]:
title = movie_list[0].split('(')[0]

In [6]:
import requests
import json
import urllib

class  MoviePosters(object):
    """docstring for  MoviePosters"""
    def __init__(self, key = '97435aa0b3279f548f2eb1591765c978',\
                    base_url =  "http://image.tmdb.org/t/p/",\
                    max_size = 'original'):
        self.key = key
        self.base_url = base_url
        self.max_size = max_size
        self.movie_id = None

    def imdb_id_from_title(self,title):
        """ return IMDB id for search string

            Args::
                title (str): the movie title search string

            Returns: 
                str. IMDB id, e.g., 'tt0095016' 
                None. If no match was found

        """
        pattern = 'http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q={movie_title}'
        url = pattern.format(movie_title=urllib.quote(title))
        r = requests.get(url)
        res = r.json()
        # sections in descending order or preference
        for section in ['popular','exact','substring']:
            key = 'title_' + section 
            if key in res:
                self.imdbid = res[key][0]['id']
                return res[key][0]['id']

    def get_poster_url(self):
        IMG_PATTERN = 'http://api.themoviedb.org/3/movie/{imdbid}/images?api_key={key}' 
        r = requests.get(IMG_PATTERN.format(key=self.key,imdbid=self.imdbid))
        api_response = r.json()
        # base_url =  "http://image.tmdb.org/t/p/"
        # max_size = 'original'
        posters = api_response['posters']
        # poster_urls = []
        # for poster in posters:
        rel_path = posters[0]['file_path']
        url = "{0}{1}{2}".format(self.base_url, self.max_size, rel_path)
        return url

In [13]:
def imdb_id_from_title(title):
    """ return IMDB id for search string

        Args::
            title (str): the movie title search string

        Returns: 
            str. IMDB id, e.g., 'tt0095016' 
            None. If no match was found

    """
    pattern = 'http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q={movie_title}'
    url = pattern.format(movie_title=urllib.quote(title))
    r = requests.get(url)
    res = r.json()
    # sections in descending order or preference
    for section in ['popular','exact','substring']:
        key = 'title_' + section 
        if key in res:
            imdbid = res[key][0]['id']
            return res[key][0]['id']

imdb_id_from_title(title)


Out[13]:
u'tt2614684'

In [14]:
movie_poster(title)


Out[14]:
'http://image.tmdb.org/t/p/original/wWTP5eAC8IRusDFDNVC7aSb3VPe.jpg'

In [25]:
for name in movie_list[:10]:
    print name.split('(')[-2].replace(r")","")


'71 
'A' gai wak 
'Breaker' Morant 
'Crocodile' Dundee II 
500 Days of Summer 
TRaumschiff Surprise - Periode 1 
*batteries not included 
...E tu vivrai nel terrore! L'aldil� 
...and justice for all. 
1 - Nenokkadine 

In [26]:
import requests
import json
import urllib

class  MoviePosters(object):
    """docstring for  MoviePosters"""
    def __init__(self, key = '97435aa0b3279f548f2eb1591765c978',\
                    base_url =  "http://image.tmdb.org/t/p/",\
                    max_size = 'original'):
        self.key = key
        self.base_url = base_url
        self.max_size = max_size
        self.movie_id = None

    def imdb_id_from_title(self,title):
        """ return IMDB id for search string

            Args::
                title (str): the movie title search string

            Returns: 
                str. IMDB id, e.g., 'tt0095016' 
                None. If no match was found

        """
        pattern = 'http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q={movie_title}'
        url = pattern.format(movie_title=urllib.quote(title))
        r = requests.get(url)
        res = r.json()
        # sections in descending order or preference
        for section in ['popular','exact','substring']:
            key = 'title_' + section 
            if key in res:
                self.imdbid = res[key][0]['id']
                return res[key][0]['id']

    def get_poster_url(self):
        IMG_PATTERN = 'http://api.themoviedb.org/3/movie/{imdbid}/images?api_key={key}' 
        r = requests.get(IMG_PATTERN.format(key=self.key,imdbid=self.imdbid))
        api_response = r.json()
        # base_url =  "http://image.tmdb.org/t/p/"
        # max_size = 'original'
        posters = api_response['posters']
        # poster_urls = []
        # for poster in posters:
        rel_path = posters[0]['file_path']
        url = "{0}{1}{2}".format(self.base_url, self.max_size, rel_path)
        return url

In [68]:
from flask import Flask, request, render_template, jsonify
import json
import requests
import socket
import time
from datetime import datetime
import numpy as np 
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from MoviePosters import MoviePosters
import urllib
import string

moviefiles = np.load('../../data/feature_movie_name.npz')
movie_list = moviefiles['arr_0']
exclude = set(string.punctuation)

def movie_poster(title):
    req = MoviePosters()
#     title = '20000 Leagues Under the Sea '
    url = req.imdb_id_from_title(title)
    print url
    if url == None:
        return 0
    poster_url = req.get_poster_url()     
    return poster_url

def all_posters(save_path, movie_list = movie_list):
    posters_url = []
    i =0 
    for name in movie_list[40:50]:
        i += 1
        title = name.split('(')[-2].replace(r")","")
        title = ''.join(ch for ch in title if ch not in exclude)
        poster_url = movie_poster(title)
        posters_url.append(poster_url)
        if i % 100 == 0: print "Url #%i" %(i)
    posters_url = np.array(posters_url)
    np.save(save_path, posters_url)

if __name__ == '__main__':
    all_posters(save_path = '../../data/movie_posters_url',movie_list = movie_list)


tt0322259
tt1272878
tt1001482
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-68-bc095e918abe> in <module>()
     40 
     41 if __name__ == '__main__':
---> 42     all_posters(save_path = '../../data/movie_posters_url',movie_list = movie_list)

<ipython-input-68-bc095e918abe> in all_posters(save_path, movie_list)
     33         title = name.split('(')[-2].replace(r")","")
     34         title = ''.join(ch for ch in title if ch not in exclude)
---> 35         poster_url = movie_poster(title)
     36         posters_url.append(poster_url)
     37         if i % 100 == 0: print "Url #%i" %(i)

<ipython-input-68-bc095e918abe> in movie_poster(title)
     23     if url == None:
     24         return 0
---> 25     poster_url = req.get_poster_url()
     26     return poster_url
     27 

/Users/zoesh/Desktop/gSchool/MovieForceApp/code/web_app/MoviePosters.pyc in get_poster_url(self)
     41         api_response = r.json()
     42         if api_response['status_code'] != 34:
---> 43             posters = api_response['posters']
     44         # poster_urls = []
     45         # for poster in posters:

KeyError: 'posters'

In [33]:
def movie_poster(title):
    req = MoviePosters()
    req.imdb_id_from_title(title)
    poster_url = req.get_poster_url()     
    return poster_url

In [35]:
posters_url


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-35-0f5e2ea09b16> in <module>()
----> 1 posters_url

NameError: name 'posters_url' is not defined

In [36]:
movie_list[:10]


Out[36]:
array(["'71 (2014)", "'A' gai wak (1983)", "'Breaker' Morant (1980)",
       "'Crocodile' Dundee II (1988)", '(500) Days of Summer (2009)',
       '(T)Raumschiff Surprise - Periode 1 (2004)',
       '*batteries not included (1987)',
       "...E tu vivrai nel terrore! L'aldil\xe0 (1981)",
       '...and justice for all. (1979)', '1 - Nenokkadine (2014)'], dtype=object)

In [57]:
for name in movie_list[43:50]:
    title = name.split('(')[-2].replace(r")","")
    title = ''.join(ch for ch in title if ch not in exclude)
    print title


20000 Leagues Under the Sea 
200 Cigarettes 
2001 Maniacs 
2001 A Space Odyssey 
2010 
2012 
2046 

In [56]:
key = '97435aa0b3279f548f2eb1591765c978'

In [59]:
imdbid = 'tt1001482'
IMG_PATTERN = 'http://api.themoviedb.org/3/movie/{imdbid}/images?api_key={key}'
r = requests.get(IMG_PATTERN.format(key=key,imdbid=imdbid))
api_response = r.json()

In [ ]:
engine = sqlalchemy.create_engine("postgres://postgres@/movieforce")
conn = engine.connect()

# save to Postgres SQL database
df_forecastio.to_sql('mf_user', engine, if_exists='append', index=True)
conn.close()
engine.dispose()

# set primary database key
sql_helper.db_setkey('forecast', 'time')