In [7]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from patsy import dmatrices

In [8]:
pickle_name = 'the-numbers/consolidated.pickle'
df_movie_data = pd.read_pickle(pickle_name)
df_movie_data.head()


Out[8]:
Release Date Release Year Title Budget Dom Box Dom Box Infl Int Box Total Box
0 Dec 31, 1970 1970 The Ballad of Tam Lin $0 $0 $0 $0 $0
1 Dec 31, 1971 1971 Sie tötete in Ekstase $0 $0 $0 $0 $0
2 Dec 31, 1972 1972 Whoever Slew Auntie Roo? $0 $0 $0 $0 $0
3 Dec 26, 1973 1973 The Exorcist $12,000,000 $204,868,002 $320,298,550 $197,867,132 $402,735,134
4 Oct 18, 1974 1974 The Texas Chainsaw Massacre $140,000 $26,572,439 $119,789,120 $0 $26,572,439

In [11]:
# Set y and random variable X
y, X = dmatrices('Total Box ~ Budget + Release Year', data=df_movie_data, return_type='dataframe')


  File "<unknown>", line 1
    Total Box
            ^
SyntaxError: invalid syntax

In [ ]: