Goal One



In [9]:

    
#Read file in as raw text
filename = "tmdb_5000_movies.csv"
file = open(filename, "r")
raw_text = ""
for line in file:
    raw_text += line
file.close()
raw_text[:1000]

""" 
could have used a with open that will close the file for you.
'prove it was there': file.count('id')
"""









    Out[9]:





'budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count\n237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""name"": ""Fantasy""}, {""id"": 878, ""name"": ""Science Fiction""}]",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"": 2964, ""name"": ""future""}, {""id"": 3386, ""name"": ""space war""}, {""id"": 3388, ""name"": ""space colony""}, {""id"": 3679, ""name"": ""society""}, {""id"": 3801, ""name"": ""space travel""}, {""id"": 9685, ""name"": ""futuristic""}, {""id"": 9840, ""name"": ""romance""}, {""id"": 9882, ""name"": ""space""}, {""id"": 9951, ""name"": ""alien""}, {""id"": 10148, ""name"": ""tribe""}, {""id"": 10158, ""name"": ""alien planet""}, {""id"": 10987, ""name"": ""cgi""}, {""id"": 11399, ""name"": ""marine""}, {""id"":'



In [10]:

    
#Then as a list of lines
filename = "tmdb_5000_movies.csv"
file = open(filename, "r")
lines = [line for line in file]
file.close()

""" faster: with open (filname, mode='r') as f: 
        [print(line) for line in f]
"""



In [1]:

    
#Then as a Data Frame
import pandas as pd
df = pd.read_csv("tmdb_5000_movies.csv")
df.head()









    Out[1]:







  
    
      
      budget
      genres
      homepage
      id
      keywords
      original_language
      original_title
      overview
      popularity
      production_companies
      production_countries
      release_date
      revenue
      runtime
      spoken_languages
      status
      tagline
      title
      vote_average
      vote_count
    
  
  
    
      0
      237000000
      [{"id": 28, "name": "Action"}, {"id": 12, "nam...
      http://www.avatarmovie.com/
      19995
      [{"id": 1463, "name": "culture clash"}, {"id":...
      en
      Avatar
      In the 22nd century, a paraplegic Marine is di...
      150.437577
      [{"name": "Ingenious Film Partners", "id": 289...
      [{"iso_3166_1": "US", "name": "United States o...
      2009-12-10
      2787965087
      162.0
      [{"iso_639_1": "en", "name": "English"}, {"iso...
      Released
      Enter the World of Pandora.
      Avatar
      7.2
      11800
    
    
      1
      300000000
      [{"id": 12, "name": "Adventure"}, {"id": 14, "...
      http://disney.go.com/disneypictures/pirates/
      285
      [{"id": 270, "name": "ocean"}, {"id": 726, "na...
      en
      Pirates of the Caribbean: At World's End
      Captain Barbossa, long believed to be dead, ha...
      139.082615
      [{"name": "Walt Disney Pictures", "id": 2}, {"...
      [{"iso_3166_1": "US", "name": "United States o...
      2007-05-19
      961000000
      169.0
      [{"iso_639_1": "en", "name": "English"}]
      Released
      At the end of the world, the adventure begins.
      Pirates of the Caribbean: At World's End
      6.9
      4500
    
    
      2
      245000000
      [{"id": 28, "name": "Action"}, {"id": 12, "nam...
      http://www.sonypictures.com/movies/spectre/
      206647
      [{"id": 470, "name": "spy"}, {"id": 818, "name...
      en
      Spectre
      A cryptic message from Bond’s past sends him o...
      107.376788
      [{"name": "Columbia Pictures", "id": 5}, {"nam...
      [{"iso_3166_1": "GB", "name": "United Kingdom"...
      2015-10-26
      880674609
      148.0
      [{"iso_639_1": "fr", "name": "Fran\u00e7ais"},...
      Released
      A Plan No One Escapes
      Spectre
      6.3
      4466
    
    
      3
      250000000
      [{"id": 28, "name": "Action"}, {"id": 80, "nam...
      http://www.thedarkknightrises.com/
      49026
      [{"id": 849, "name": "dc comics"}, {"id": 853,...
      en
      The Dark Knight Rises
      Following the death of District Attorney Harve...
      112.312950
      [{"name": "Legendary Pictures", "id": 923}, {"...
      [{"iso_3166_1": "US", "name": "United States o...
      2012-07-16
      1084939099
      165.0
      [{"iso_639_1": "en", "name": "English"}]
      Released
      The Legend Ends
      The Dark Knight Rises
      7.6
      9106
    
    
      4
      260000000
      [{"id": 28, "name": "Action"}, {"id": 12, "nam...
      http://movies.disney.com/john-carter
      49529
      [{"id": 818, "name": "based on novel"}, {"id":...
      en
      John Carter
      John Carter is a war-weary, former military ca...
      43.926995
      [{"name": "Walt Disney Pictures", "id": 2}]
      [{"iso_3166_1": "US", "name": "United States o...
      2012-03-07
      284139100
      132.0
      [{"iso_639_1": "en", "name": "English"}]
      Released
      Lost in our world, found in another.
      John Carter
      6.1
      2124

Goal Two



In [15]:

    
# references
# https://www.kaggle.com/fabiendaniel/film-recommendation-engine
# http://www.jeannicholashould.com/tidy-data-in-python.html



In [64]:

    
import json
import pandas as pd
import numpy as np

df = pd.read_csv("tmdb_5000_movies.csv")

#convert to json
json_columns = ['genres', 'keywords', 'production_countries',
                    'production_companies', 'spoken_languages']
for column in json_columns:
    df[column] = df[column].apply(json.loads)


def get_unique_inner_json(feature):
    tmp = []
    for i, row in df[feature].iteritems():
        for x in range(0,len(df[feature].iloc[i])):
            tmp.append(df[feature].iloc[i][x]['name'])

    unique_values = set(tmp)
    return unique_values



In [35]:

    
def widen_data(df, feature):
    unique_json = get_unique_inner_json(feature)
   
    tmp = []
    #rearrange genres
    for i, row in df.iterrows():
        for x in range(0,len(row[feature])):
            for val in unique_json:
                if row[feature][x]['name'] == val:
                    row[val] = 1
                    
        tmp.append(row)
    
    new_df = pd.DataFrame(tmp)
    new_df[list(unique_json)] = new_df[list(unique_json)].fillna(value=0)
    return new_df



In [36]:

    
genres_arranged_df = widen_data(df, "genres")

#Convert float values to ints
genres_arranged_df[list(get_unique_inner_json("genres"))] = genres_arranged_df[list(get_unique_inner_json("genres"))].astype(int)



In [48]:

    
genres_arranged_df.head()









    Out[48]:







  
    
      
      Action
      Adventure
      Animation
      Comedy
      Crime
      Documentary
      Drama
      Family
      Fantasy
      Foreign
      ...
      production_countries
      release_date
      revenue
      runtime
      spoken_languages
      status
      tagline
      title
      vote_average
      vote_count
    
  
  
    
      0
      1
      1
      0
      0
      0
      0
      0
      0
      1
      0
      ...
      [{'iso_3166_1': 'US', 'name': 'United States o...
      2009-12-10
      2787965087
      162.0
      [{'iso_639_1': 'en', 'name': 'English'}, {'iso...
      Released
      Enter the World of Pandora.
      Avatar
      7.2
      11800
    
    
      1
      1
      1
      0
      0
      0
      0
      0
      0
      1
      0
      ...
      [{'iso_3166_1': 'US', 'name': 'United States o...
      2007-05-19
      961000000
      169.0
      [{'iso_639_1': 'en', 'name': 'English'}]
      Released
      At the end of the world, the adventure begins.
      Pirates of the Caribbean: At World's End
      6.9
      4500
    
    
      2
      1
      1
      0
      0
      1
      0
      0
      0
      0
      0
      ...
      [{'iso_3166_1': 'GB', 'name': 'United Kingdom'...
      2015-10-26
      880674609
      148.0
      [{'iso_639_1': 'fr', 'name': 'Français'}, {'is...
      Released
      A Plan No One Escapes
      Spectre
      6.3
      4466
    
    
      3
      1
      0
      0
      0
      1
      0
      1
      0
      0
      0
      ...
      [{'iso_3166_1': 'US', 'name': 'United States o...
      2012-07-16
      1084939099
      165.0
      [{'iso_639_1': 'en', 'name': 'English'}]
      Released
      The Legend Ends
      The Dark Knight Rises
      7.6
      9106
    
    
      4
      1
      1
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      [{'iso_3166_1': 'US', 'name': 'United States o...
      2012-03-07
      284139100
      132.0
      [{'iso_639_1': 'en', 'name': 'English'}]
      Released
      Lost in our world, found in another.
      John Carter
      6.1
      2124
    
  

5 rows × 40 columns

Goal Three



In [60]:

    
genres_long_df = pd.melt(genres_arranged_df, id_vars=df.columns, value_vars=get_unique_inner_json("genres"), var_name="genre", value_name="genre_val")



In [62]:

    
genres_long_df = genres_long_df[genres_long_df['genre_val'] == 1]



In [63]:

    
genres_long_df[genres_long_df['id'] == 19995]









    Out[63]:







  
    
      
      budget
      genres
      homepage
      id
      keywords
      original_language
      original_title
      overview
      popularity
      production_companies
      ...
      revenue
      runtime
      spoken_languages
      status
      tagline
      title
      vote_average
      vote_count
      genre
      genre_val
    
  
  
    
      0
      237000000
      [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...
      http://www.avatarmovie.com/
      19995
      [{'id': 1463, 'name': 'culture clash'}, {'id':...
      en
      Avatar
      In the 22nd century, a paraplegic Marine is di...
      150.437577
      [{'name': 'Ingenious Film Partners', 'id': 289...
      ...
      2787965087
      162.0
      [{'iso_639_1': 'en', 'name': 'English'}, {'iso...
      Released
      Enter the World of Pandora.
      Avatar
      7.2
      11800
      Adventure
      1
    
    
      4803
      237000000
      [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...
      http://www.avatarmovie.com/
      19995
      [{'id': 1463, 'name': 'culture clash'}, {'id':...
      en
      Avatar
      In the 22nd century, a paraplegic Marine is di...
      150.437577
      [{'name': 'Ingenious Film Partners', 'id': 289...
      ...
      2787965087
      162.0
      [{'iso_639_1': 'en', 'name': 'English'}, {'iso...
      Released
      Enter the World of Pandora.
      Avatar
      7.2
      11800
      Science Fiction
      1
    
    
      14409
      237000000
      [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...
      http://www.avatarmovie.com/
      19995
      [{'id': 1463, 'name': 'culture clash'}, {'id':...
      en
      Avatar
      In the 22nd century, a paraplegic Marine is di...
      150.437577
      [{'name': 'Ingenious Film Partners', 'id': 289...
      ...
      2787965087
      162.0
      [{'iso_639_1': 'en', 'name': 'English'}, {'iso...
      Released
      Enter the World of Pandora.
      Avatar
      7.2
      11800
      Action
      1
    
    
      19212
      237000000
      [{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...
      http://www.avatarmovie.com/
      19995
      [{'id': 1463, 'name': 'culture clash'}, {'id':...
      en
      Avatar
      In the 22nd century, a paraplegic Marine is di...
      150.437577
      [{'name': 'Ingenious Film Partners', 'id': 289...
      ...
      2787965087
      162.0
      [{'iso_639_1': 'en', 'name': 'English'}, {'iso...
      Released
      Enter the World of Pandora.
      Avatar
      7.2
      11800
      Fantasy
      1
    
  

4 rows × 22 columns



In [ ]:

	budget	genres	homepage	id	keywords	original_language	original_title	overview	popularity	production_companies	production_countries	release_date	revenue	runtime	spoken_languages	status	tagline	title	vote_average	vote_count
0	237000000	[{"id": 28, "name": "Action"}, {"id": 12, "nam...	http://www.avatarmovie.com/	19995	[{"id": 1463, "name": "culture clash"}, {"id":...	en	Avatar	In the 22nd century, a paraplegic Marine is di...	150.437577	[{"name": "Ingenious Film Partners", "id": 289...	[{"iso_3166_1": "US", "name": "United States o...	2009-12-10	2787965087	162.0	[{"iso_639_1": "en", "name": "English"}, {"iso...	Released	Enter the World of Pandora.	Avatar	7.2	11800
1	300000000	[{"id": 12, "name": "Adventure"}, {"id": 14, "...	http://disney.go.com/disneypictures/pirates/	285	[{"id": 270, "name": "ocean"}, {"id": 726, "na...	en	Pirates of the Caribbean: At World's End	Captain Barbossa, long believed to be dead, ha...	139.082615	[{"name": "Walt Disney Pictures", "id": 2}, {"...	[{"iso_3166_1": "US", "name": "United States o...	2007-05-19	961000000	169.0	[{"iso_639_1": "en", "name": "English"}]	Released	At the end of the world, the adventure begins.	Pirates of the Caribbean: At World's End	6.9	4500
2	245000000	[{"id": 28, "name": "Action"}, {"id": 12, "nam...	http://www.sonypictures.com/movies/spectre/	206647	[{"id": 470, "name": "spy"}, {"id": 818, "name...	en	Spectre	A cryptic message from Bond’s past sends him o...	107.376788	[{"name": "Columbia Pictures", "id": 5}, {"nam...	[{"iso_3166_1": "GB", "name": "United Kingdom"...	2015-10-26	880674609	148.0	[{"iso_639_1": "fr", "name": "Fran\u00e7ais"},...	Released	A Plan No One Escapes	Spectre	6.3	4466
3	250000000	[{"id": 28, "name": "Action"}, {"id": 80, "nam...	http://www.thedarkknightrises.com/	49026	[{"id": 849, "name": "dc comics"}, {"id": 853,...	en	The Dark Knight Rises	Following the death of District Attorney Harve...	112.312950	[{"name": "Legendary Pictures", "id": 923}, {"...	[{"iso_3166_1": "US", "name": "United States o...	2012-07-16	1084939099	165.0	[{"iso_639_1": "en", "name": "English"}]	Released	The Legend Ends	The Dark Knight Rises	7.6	9106
4	260000000	[{"id": 28, "name": "Action"}, {"id": 12, "nam...	http://movies.disney.com/john-carter	49529	[{"id": 818, "name": "based on novel"}, {"id":...	en	John Carter	John Carter is a war-weary, former military ca...	43.926995	[{"name": "Walt Disney Pictures", "id": 2}]	[{"iso_3166_1": "US", "name": "United States o...	2012-03-07	284139100	132.0	[{"iso_639_1": "en", "name": "English"}]	Released	Lost in our world, found in another.	John Carter	6.1	2124

	Action	Adventure	Crime	Drama	Fantasy	...	production_countries	release_date	revenue	runtime	spoken_languages	status	tagline	title	vote_average	vote_count
0	1	1	0	0	1	...	[{'iso_3166_1': 'US', 'name': 'United States o...	2009-12-10	2787965087	162.0	[{'iso_639_1': 'en', 'name': 'English'}, {'iso...	Released	Enter the World of Pandora.	Avatar	7.2	11800
1	1	1	0	0	1	...	[{'iso_3166_1': 'US', 'name': 'United States o...	2007-05-19	961000000	169.0	[{'iso_639_1': 'en', 'name': 'English'}]	Released	At the end of the world, the adventure begins.	Pirates of the Caribbean: At World's End	6.9	4500
2	1	1	1	0	0	...	[{'iso_3166_1': 'GB', 'name': 'United Kingdom'...	2015-10-26	880674609	148.0	[{'iso_639_1': 'fr', 'name': 'Français'}, {'is...	Released	A Plan No One Escapes	Spectre	6.3	4466
3	1	0	1	1	0	...	[{'iso_3166_1': 'US', 'name': 'United States o...	2012-07-16	1084939099	165.0	[{'iso_639_1': 'en', 'name': 'English'}]	Released	The Legend Ends	The Dark Knight Rises	7.6	9106
4	1	1	0	0	0	...	[{'iso_3166_1': 'US', 'name': 'United States o...	2012-03-07	284139100	132.0	[{'iso_639_1': 'en', 'name': 'English'}]	Released	Lost in our world, found in another.	John Carter	6.1	2124

	budget	genres	homepage	id	keywords	original_language	original_title	overview	popularity	production_companies	...	revenue	runtime	spoken_languages	status	tagline	title	vote_average	vote_count	genre	genre_val
0	237000000	[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...	http://www.avatarmovie.com/	19995	[{'id': 1463, 'name': 'culture clash'}, {'id':...	en	Avatar	In the 22nd century, a paraplegic Marine is di...	150.437577	[{'name': 'Ingenious Film Partners', 'id': 289...	...	2787965087	162.0	[{'iso_639_1': 'en', 'name': 'English'}, {'iso...	Released	Enter the World of Pandora.	Avatar	7.2	11800	Adventure	1
4803	237000000	[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...	http://www.avatarmovie.com/	19995	[{'id': 1463, 'name': 'culture clash'}, {'id':...	en	Avatar	In the 22nd century, a paraplegic Marine is di...	150.437577	[{'name': 'Ingenious Film Partners', 'id': 289...	...	2787965087	162.0	[{'iso_639_1': 'en', 'name': 'English'}, {'iso...	Released	Enter the World of Pandora.	Avatar	7.2	11800	Science Fiction	1
14409	237000000	[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...	http://www.avatarmovie.com/	19995	[{'id': 1463, 'name': 'culture clash'}, {'id':...	en	Avatar	In the 22nd century, a paraplegic Marine is di...	150.437577	[{'name': 'Ingenious Film Partners', 'id': 289...	...	2787965087	162.0	[{'iso_639_1': 'en', 'name': 'English'}, {'iso...	Released	Enter the World of Pandora.	Avatar	7.2	11800	Action	1
19212	237000000	[{'id': 28, 'name': 'Action'}, {'id': 12, 'nam...	http://www.avatarmovie.com/	19995	[{'id': 1463, 'name': 'culture clash'}, {'id':...	en	Avatar	In the 22nd century, a paraplegic Marine is di...	150.437577	[{'name': 'Ingenious Film Partners', 'id': 289...	...	2787965087	162.0	[{'iso_639_1': 'en', 'name': 'English'}, {'iso...	Released	Enter the World of Pandora.	Avatar	7.2	11800	Fantasy	1