In [1]:
from flask import Flask, request, render_template, jsonify
import json
import requests
import socket
import time
from datetime import datetime
import numpy as np
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from MoviePosters import MoviePosters
import urllib
moviefiles = np.load('../../data/feature_movie_name.npz')
movie_list = moviefiles['arr_0']
def movie_poster(title):
req = MoviePosters()
req.imdb_id_from_title(title)
poster_url = req.get_poster_url()
return poster_url
def all_posters(save_path, movie_list = movie_list):
posters_url = []
for name in movie_list:
title = name.split('(')[0]
poster_url = movie_poster(title)
posters_url.append(poster_url)
posters_url = np.array(posters_url)
np.save(save_path, posters_url)
In [3]:
posters_url = []
for name in movie_list:
title = name.split('(')[0]
poster_url = movie_poster(title)
posters_url.append(poster_url)
posters_url = np.array(posters_url)
# np.save(save_path, posters_url)
In [4]:
movie_list[1].split('(')[0]
Out[4]:
In [12]:
title = movie_list[0].split('(')[0]
In [6]:
import requests
import json
import urllib
class MoviePosters(object):
"""docstring for MoviePosters"""
def __init__(self, key = '97435aa0b3279f548f2eb1591765c978',\
base_url = "http://image.tmdb.org/t/p/",\
max_size = 'original'):
self.key = key
self.base_url = base_url
self.max_size = max_size
self.movie_id = None
def imdb_id_from_title(self,title):
""" return IMDB id for search string
Args::
title (str): the movie title search string
Returns:
str. IMDB id, e.g., 'tt0095016'
None. If no match was found
"""
pattern = 'http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q={movie_title}'
url = pattern.format(movie_title=urllib.quote(title))
r = requests.get(url)
res = r.json()
# sections in descending order or preference
for section in ['popular','exact','substring']:
key = 'title_' + section
if key in res:
self.imdbid = res[key][0]['id']
return res[key][0]['id']
def get_poster_url(self):
IMG_PATTERN = 'http://api.themoviedb.org/3/movie/{imdbid}/images?api_key={key}'
r = requests.get(IMG_PATTERN.format(key=self.key,imdbid=self.imdbid))
api_response = r.json()
# base_url = "http://image.tmdb.org/t/p/"
# max_size = 'original'
posters = api_response['posters']
# poster_urls = []
# for poster in posters:
rel_path = posters[0]['file_path']
url = "{0}{1}{2}".format(self.base_url, self.max_size, rel_path)
return url
In [13]:
def imdb_id_from_title(title):
""" return IMDB id for search string
Args::
title (str): the movie title search string
Returns:
str. IMDB id, e.g., 'tt0095016'
None. If no match was found
"""
pattern = 'http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q={movie_title}'
url = pattern.format(movie_title=urllib.quote(title))
r = requests.get(url)
res = r.json()
# sections in descending order or preference
for section in ['popular','exact','substring']:
key = 'title_' + section
if key in res:
imdbid = res[key][0]['id']
return res[key][0]['id']
imdb_id_from_title(title)
Out[13]:
In [14]:
movie_poster(title)
Out[14]:
In [25]:
for name in movie_list[:10]:
print name.split('(')[-2].replace(r")","")
In [26]:
import requests
import json
import urllib
class MoviePosters(object):
"""docstring for MoviePosters"""
def __init__(self, key = '97435aa0b3279f548f2eb1591765c978',\
base_url = "http://image.tmdb.org/t/p/",\
max_size = 'original'):
self.key = key
self.base_url = base_url
self.max_size = max_size
self.movie_id = None
def imdb_id_from_title(self,title):
""" return IMDB id for search string
Args::
title (str): the movie title search string
Returns:
str. IMDB id, e.g., 'tt0095016'
None. If no match was found
"""
pattern = 'http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q={movie_title}'
url = pattern.format(movie_title=urllib.quote(title))
r = requests.get(url)
res = r.json()
# sections in descending order or preference
for section in ['popular','exact','substring']:
key = 'title_' + section
if key in res:
self.imdbid = res[key][0]['id']
return res[key][0]['id']
def get_poster_url(self):
IMG_PATTERN = 'http://api.themoviedb.org/3/movie/{imdbid}/images?api_key={key}'
r = requests.get(IMG_PATTERN.format(key=self.key,imdbid=self.imdbid))
api_response = r.json()
# base_url = "http://image.tmdb.org/t/p/"
# max_size = 'original'
posters = api_response['posters']
# poster_urls = []
# for poster in posters:
rel_path = posters[0]['file_path']
url = "{0}{1}{2}".format(self.base_url, self.max_size, rel_path)
return url
In [68]:
from flask import Flask, request, render_template, jsonify
import json
import requests
import socket
import time
from datetime import datetime
import numpy as np
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
from MoviePosters import MoviePosters
import urllib
import string
moviefiles = np.load('../../data/feature_movie_name.npz')
movie_list = moviefiles['arr_0']
exclude = set(string.punctuation)
def movie_poster(title):
req = MoviePosters()
# title = '20000 Leagues Under the Sea '
url = req.imdb_id_from_title(title)
print url
if url == None:
return 0
poster_url = req.get_poster_url()
return poster_url
def all_posters(save_path, movie_list = movie_list):
posters_url = []
i =0
for name in movie_list[40:50]:
i += 1
title = name.split('(')[-2].replace(r")","")
title = ''.join(ch for ch in title if ch not in exclude)
poster_url = movie_poster(title)
posters_url.append(poster_url)
if i % 100 == 0: print "Url #%i" %(i)
posters_url = np.array(posters_url)
np.save(save_path, posters_url)
if __name__ == '__main__':
all_posters(save_path = '../../data/movie_posters_url',movie_list = movie_list)
In [33]:
def movie_poster(title):
req = MoviePosters()
req.imdb_id_from_title(title)
poster_url = req.get_poster_url()
return poster_url
In [35]:
posters_url
In [36]:
movie_list[:10]
Out[36]:
In [57]:
for name in movie_list[43:50]:
title = name.split('(')[-2].replace(r")","")
title = ''.join(ch for ch in title if ch not in exclude)
print title
In [56]:
key = '97435aa0b3279f548f2eb1591765c978'
In [59]:
imdbid = 'tt1001482'
IMG_PATTERN = 'http://api.themoviedb.org/3/movie/{imdbid}/images?api_key={key}'
r = requests.get(IMG_PATTERN.format(key=key,imdbid=imdbid))
api_response = r.json()
In [ ]:
engine = sqlalchemy.create_engine("postgres://postgres@/movieforce")
conn = engine.connect()
# save to Postgres SQL database
df_forecastio.to_sql('mf_user', engine, if_exists='append', index=True)
conn.close()
engine.dispose()
# set primary database key
sql_helper.db_setkey('forecast', 'time')