notebook.community

Edit and run



In [2]:

    
import pandas
food_info = pandas.read_csv('food_info.csv')
print type(food_info)
print food_info.dtypes









    



<class 'pandas.core.frame.DataFrame'>
NDB_No               int64
Shrt_Desc           object
Water_(g)          float64
Energ_Kcal           int64
Protein_(g)        float64
Lipid_Tot_(g)      float64
Ash_(g)            float64
Carbohydrt_(g)     float64
Fiber_TD_(g)       float64
Sugar_Tot_(g)      float64
Calcium_(mg)       float64
Iron_(mg)          float64
Magnesium_(mg)     float64
Phosphorus_(mg)    float64
Potassium_(mg)     float64
Sodium_(mg)        float64
Zinc_(mg)          float64
Copper_(mg)        float64
Manganese_(mg)     float64
Selenium_(mcg)     float64
Vit_C_(mg)         float64
Thiamin_(mg)       float64
Riboflavin_(mg)    float64
Niacin_(mg)        float64
Vit_B6_(mg)        float64
Vit_B12_(mcg)      float64
Vit_A_IU           float64
Vit_A_RAE          float64
Vit_E_(mg)         float64
Vit_D_mcg          float64
Vit_D_IU           float64
Vit_K_(mcg)        float64
FA_Sat_(g)         float64
FA_Mono_(g)        float64
FA_Poly_(g)        float64
Cholestrl_(mg)     float64
dtype: object



In [5]:

    
first_rows = food_info.head()
first_rows
print food_info.head(3)
print food_info.columns
print food_info.shape









    



   NDB_No                 Shrt_Desc  Water_(g)  Energ_Kcal  Protein_(g)  \
0    1001          BUTTER WITH SALT      15.87         717         0.85   
1    1002  BUTTER WHIPPED WITH SALT      15.87         717         0.85   
2    1003      BUTTER OIL ANHYDROUS       0.24         876         0.28   

   Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  Fiber_TD_(g)  Sugar_Tot_(g)  \
0          81.11     2.11            0.06           0.0           0.06   
1          81.11     2.11            0.06           0.0           0.06   
2          99.48     0.00            0.00           0.0           0.00   

        ...        Vit_A_IU  Vit_A_RAE  Vit_E_(mg)  Vit_D_mcg  Vit_D_IU  \
0       ...          2499.0      684.0        2.32        1.5      60.0   
1       ...          2499.0      684.0        2.32        1.5      60.0   
2       ...          3069.0      840.0        2.80        1.8      73.0   

   Vit_K_(mcg)  FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  Cholestrl_(mg)  
0          7.0      51.368       21.021        3.043           215.0  
1          7.0      50.489       23.426        3.012           219.0  
2          8.6      61.924       28.732        3.694           256.0  

[3 rows x 36 columns]
Index([u'NDB_No', u'Shrt_Desc', u'Water_(g)', u'Energ_Kcal', u'Protein_(g)',
       u'Lipid_Tot_(g)', u'Ash_(g)', u'Carbohydrt_(g)', u'Fiber_TD_(g)',
       u'Sugar_Tot_(g)', u'Calcium_(mg)', u'Iron_(mg)', u'Magnesium_(mg)',
       u'Phosphorus_(mg)', u'Potassium_(mg)', u'Sodium_(mg)', u'Zinc_(mg)',
       u'Copper_(mg)', u'Manganese_(mg)', u'Selenium_(mcg)', u'Vit_C_(mg)',
       u'Thiamin_(mg)', u'Riboflavin_(mg)', u'Niacin_(mg)', u'Vit_B6_(mg)',
       u'Vit_B12_(mcg)', u'Vit_A_IU', u'Vit_A_RAE', u'Vit_E_(mg)',
       u'Vit_D_mcg', u'Vit_D_IU', u'Vit_K_(mcg)', u'FA_Sat_(g)',
       u'FA_Mono_(g)', u'FA_Poly_(g)', u'Cholestrl_(mg)'],
      dtype='object')
(8618, 36)



In [15]:

    
#print food_info.loc[1]

#print food_info.loc[6]
#print food_info.loc[8620]
#print food_info['NDB_No']
col_name = ['NDB_No','Shrt_Desc']
print food_info[col_name]









    



      NDB_No                                          Shrt_Desc
0       1001                                   BUTTER WITH SALT
1       1002                           BUTTER WHIPPED WITH SALT
2       1003                               BUTTER OIL ANHYDROUS
3       1004                                        CHEESE BLUE
4       1005                                       CHEESE BRICK
5       1006                                        CHEESE BRIE
6       1007                                   CHEESE CAMEMBERT
7       1008                                     CHEESE CARAWAY
8       1009                                     CHEESE CHEDDAR
9       1010                                    CHEESE CHESHIRE
10      1011                                       CHEESE COLBY
11      1012                CHEESE COTTAGE CRMD LRG OR SML CURD
12      1013                        CHEESE COTTAGE CRMD W/FRUIT
13      1014   CHEESE COTTAGE NONFAT UNCRMD DRY LRG OR SML CURD
14      1015                   CHEESE COTTAGE LOWFAT 2% MILKFAT
15      1016                   CHEESE COTTAGE LOWFAT 1% MILKFAT
16      1017                                       CHEESE CREAM
17      1018                                        CHEESE EDAM
18      1019                                        CHEESE FETA
19      1020                                     CHEESE FONTINA
20      1021                                     CHEESE GJETOST
21      1022                                       CHEESE GOUDA
22      1023                                     CHEESE GRUYERE
23      1024                                   CHEESE LIMBURGER
24      1025                                    CHEESE MONTEREY
25      1026                         CHEESE MOZZARELLA WHL MILK
26      1027                CHEESE MOZZARELLA WHL MILK LO MOIST
27      1028                   CHEESE MOZZARELLA PART SKIM MILK
28      1029               CHEESE MOZZARELLA LO MOIST PART-SKIM
29      1030                                    CHEESE MUENSTER
...      ...                                                ...
8588   43544         BABYFOOD CRL RICE W/ PEARS & APPL DRY INST
8589   43546                     BABYFOOD BANANA NO TAPIOCA STR
8590   43550                     BABYFOOD BANANA APPL DSSRT STR
8591   43566       SNACKS TORTILLA CHIPS LT (BAKED W/ LESS OIL)
8592   43570  CEREALS RTE POST HONEY BUNCHES OF OATS HONEY RSTD
8593   43572                         POPCORN MICROWAVE LOFAT&NA
8594   43585                       BABYFOOD FRUIT SUPREME DSSRT
8595   43589                               CHEESE SWISS LOW FAT
8596   43595             BREAKFAST BAR CORN FLAKE CRUST W/FRUIT
8597   43597                            CHEESE MOZZARELLA LO NA
8598   43598                           MAYONNAISE DRSNG NO CHOL
8599   44005                          OIL CORN PEANUT AND OLIVE
8600   44018                   SWEETENERS TABLETOP FRUCTOSE LIQ
8601   44048                              CHEESE FOOD IMITATION
8602   44055                                CELERY FLAKES DRIED
8603   44061           PUDDINGS CHOC FLAVOR LO CAL INST DRY MIX
8604   44074                    BABYFOOD GRAPE JUC NO SUGAR CND
8605   44110                   JELLIES RED SUGAR HOME PRESERVED
8606   44158                         PIE FILLINGS BLUEBERRY CND
8607   44203               COCKTAIL MIX NON-ALCOHOLIC CONCD FRZ
8608   44258            PUDDINGS CHOC FLAVOR LO CAL REG DRY MIX
8609   44259  PUDDINGS ALL FLAVORS XCPT CHOC LO CAL REG DRY MIX
8610   44260  PUDDINGS ALL FLAVORS XCPT CHOC LO CAL INST DRY...
8611   48052                                 VITAL WHEAT GLUTEN
8612   80200                                      FROG LEGS RAW
8613   83110                                    MACKEREL SALTED
8614   90240                         SCALLOP (BAY&SEA) CKD STMD
8615   90480                                         SYRUP CANE
8616   90560                                          SNAIL RAW
8617   93600                                   TURTLE GREEN RAW

[8618 rows x 2 columns]



In [17]:

    
col_name = food_info.columns.tolist()
print col_name

gram_name = []

for c in col_name:
    if c.endswith('(g)'):
        gram_name.append(c)

gram_df = food_info[gram_name]

print gram_df.head(3)









    



['NDB_No', 'Shrt_Desc', 'Water_(g)', 'Energ_Kcal', 'Protein_(g)', 'Lipid_Tot_(g)', 'Ash_(g)', 'Carbohydrt_(g)', 'Fiber_TD_(g)', 'Sugar_Tot_(g)', 'Calcium_(mg)', 'Iron_(mg)', 'Magnesium_(mg)', 'Phosphorus_(mg)', 'Potassium_(mg)', 'Sodium_(mg)', 'Zinc_(mg)', 'Copper_(mg)', 'Manganese_(mg)', 'Selenium_(mcg)', 'Vit_C_(mg)', 'Thiamin_(mg)', 'Riboflavin_(mg)', 'Niacin_(mg)', 'Vit_B6_(mg)', 'Vit_B12_(mcg)', 'Vit_A_IU', 'Vit_A_RAE', 'Vit_E_(mg)', 'Vit_D_mcg', 'Vit_D_IU', 'Vit_K_(mcg)', 'FA_Sat_(g)', 'FA_Mono_(g)', 'FA_Poly_(g)', 'Cholestrl_(mg)']
   Water_(g)  Protein_(g)  Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  \
0      15.87         0.85          81.11     2.11            0.06   
1      15.87         0.85          81.11     2.11            0.06   
2       0.24         0.28          99.48     0.00            0.00   

   Fiber_TD_(g)  Sugar_Tot_(g)  FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  
0           0.0           0.06      51.368       21.021        3.043  
1           0.0           0.06      50.489       23.426        3.012  
2           0.0           0.00      61.924       28.732        3.694



In [21]:

    
#print food_info['Copper_(mg)']
div_1000 = food_info['Sodium_(mg)']/1000
#print div_1000
print food_info.shape
food_info['Copper_(g)'] = div_1000
print food_info.shape









    



(8618, 37)
(8618, 37)



In [30]:

    
food_info.sort_values('Water_(g)',inplace=True,ascending = False)
print food_info['Water_(g)']









    



4377    100.00
4348    100.00
4376    100.00
4378    100.00
4209    100.00
4404     99.98
4372     99.98
4379     99.97
4407     99.97
4373     99.97
4374     99.96
4213     99.95
4204     99.95
4208     99.95
4203     99.93
4205     99.90
4357     99.90
4356     99.90
4369     99.90
4347     99.90
4239     99.90
4408     99.89
4411     99.85
4252     99.80
4270     99.80
4392     99.80
4260     99.80
4409     99.79
4255     99.74
4398     99.70
         ...  
635       0.00
8455      0.00
671       0.00
790       0.00
689       0.00
8599      0.00
744       0.00
737       0.00
711       0.00
763       0.00
787       0.00
684       0.00
665       0.00
8509      0.00
638       0.00
637       0.00
686       0.00
8122      0.00
634       0.00
633       0.00
632       0.00
630       0.00
629       0.00
631       0.00
6150       NaN
6067       NaN
6113       NaN
1983       NaN
7776       NaN
6095       NaN
Name: Water_(g), dtype: float64



In [34]:

    
import numpy as np
import pandas as pd
titanic_survival = pd.read_csv('titanic_train.csv')
titanic_survival.head()









    Out[34]:






  
    
      
      PassengerId
      Survived
      Pclass
      Name
      Sex
      Age
      SibSp
      Parch
      Ticket
      Fare
      Cabin
      Embarked
    
  
  
    
      0
      1
      0
      3
      Braund, Mr. Owen Harris
      male
      22.0
      1
      0
      A/5 21171
      7.2500
      NaN
      S
    
    
      1
      2
      1
      1
      Cumings, Mrs. John Bradley (Florence Briggs Th...
      female
      38.0
      1
      0
      PC 17599
      71.2833
      C85
      C
    
    
      2
      3
      1
      3
      Heikkinen, Miss. Laina
      female
      26.0
      0
      0
      STON/O2. 3101282
      7.9250
      NaN
      S
    
    
      3
      4
      1
      1
      Futrelle, Mrs. Jacques Heath (Lily May Peel)
      female
      35.0
      1
      0
      113803
      53.1000
      C123
      S
    
    
      4
      5
      0
      3
      Allen, Mr. William Henry
      male
      35.0
      0
      0
      373450
      8.0500
      NaN
      S



In [45]:

    
age = titanic_survival['Age']
#print age.loc[0:10]
age_is_null = pd.isnull(age)
#print age_is_null
age_is_true = age[age_is_null]
#print age_is_true
age_is_count = len(age_is_true)
#print age_is_count



In [39]:

    
mean_age = sum(titanic_survival['Age'])/len(titanic_survival['Age'])
print mean_age

nan



In [42]:

    
good_age = titanic_survival['Age'][age_is_null == False]
#print good_age
correct_mean_age = sum(good_age)/len(good_age)
print correct_mean_age









    



29.6991176471



In [44]:

    
correct_mean_age_test = titanic_survival['Age'].mean()
print correct_mean_age_test









    



29.6991176471



In [46]:

    
passenger_class = [1,2,3]
meanfare_by_class = {}

for m in passenger_class:
    pclass_row = titanic_survival[titanic_survival['Pclass'] == m]
    pclass_fare = pclass_row['Fare']
    meanfare_by_class_m = pclass_fare.mean()
    meanfare_by_class[m] = meanfare_by_class_m
print meanfare_by_class









    



{1: 84.15468749999992, 2: 20.66218315217391, 3: 13.675550101832997}



In [48]:

    
passenger_mean_survival = titanic_survival.pivot_table(index='Pclass',values='Survived',aggfunc=np.mean)
print passenger_mean_survival









    



Pclass
1    0.629630
2    0.472826
3    0.242363
Name: Survived, dtype: float64



In [52]:

    
passenger_age = titanic_survival.pivot_table(index='Pclass',values='Age')
print passenger_age









    



Pclass
1    38.233441
2    29.877630
3    25.140620
Name: Age, dtype: float64



In [53]:

    
port_status = titanic_survival.pivot_table(index='Embarked',values=['Fare','Survived'],aggfunc=np.sum)
print port_status









    



                Fare  Survived
Embarked                      
C         10072.2962        93
Q          1022.2543        30
S         17439.3988       217



In [55]:

    
new_titanic_survival = titanic_survival.dropna(axis=0,subset=['Age','Sex'])
#print new_titanic_survival



In [57]:

    
row_index_83_age = titanic_survival.loc[83,'Age']
row_index_766_pcalss = titanic_survival.loc[766,'Pclass']
print row_index_83_age
print row_index_766_pcalss



In [62]:

    
sort_titanic_survival = titanic_survival.sort_values('Age',ascending=False)
print sort_titanic_survival.iloc[0:10]
print '-----------------------------'
sort_titanic_survival_reindex = sort_titanic_survival.reset_index(drop = True)
print sort_titanic_survival_reindex.loc[0:10]









    



     PassengerId  Survived  Pclass                                  Name  \
630          631         1       1  Barkworth, Mr. Algernon Henry Wilson   
851          852         0       3                   Svensson, Mr. Johan   
493          494         0       1               Artagaveytia, Mr. Ramon   
96            97         0       1             Goldschmidt, Mr. George B   
116          117         0       3                  Connors, Mr. Patrick   
672          673         0       2           Mitchell, Mr. Henry Michael   
745          746         0       1          Crosby, Capt. Edward Gifford   
33            34         0       2                 Wheadon, Mr. Edward H   
54            55         0       1        Ostby, Mr. Engelhart Cornelius   
280          281         0       3                      Duane, Mr. Frank   

      Sex   Age  SibSp  Parch      Ticket     Fare Cabin Embarked  
630  male  80.0      0      0       27042  30.0000   A23        S  
851  male  74.0      0      0      347060   7.7750   NaN        S  
493  male  71.0      0      0    PC 17609  49.5042   NaN        C  
96   male  71.0      0      0    PC 17754  34.6542    A5        C  
116  male  70.5      0      0      370369   7.7500   NaN        Q  
672  male  70.0      0      0  C.A. 24580  10.5000   NaN        S  
745  male  70.0      1      1   WE/P 5735  71.0000   B22        S  
33   male  66.0      0      0  C.A. 24579  10.5000   NaN        S  
54   male  65.0      0      1      113509  61.9792   B30        C  
280  male  65.0      0      0      336439   7.7500   NaN        Q  
-----------------------------
    PassengerId  Survived  Pclass                                  Name   Sex  \
0           631         1       1  Barkworth, Mr. Algernon Henry Wilson  male   
1           852         0       3                   Svensson, Mr. Johan  male   
2           494         0       1               Artagaveytia, Mr. Ramon  male   
3            97         0       1             Goldschmidt, Mr. George B  male   
4           117         0       3                  Connors, Mr. Patrick  male   
5           673         0       2           Mitchell, Mr. Henry Michael  male   
6           746         0       1          Crosby, Capt. Edward Gifford  male   
7            34         0       2                 Wheadon, Mr. Edward H  male   
8            55         0       1        Ostby, Mr. Engelhart Cornelius  male   
9           281         0       3                      Duane, Mr. Frank  male   
10          457         0       1             Millet, Mr. Francis Davis  male   

     Age  SibSp  Parch      Ticket     Fare Cabin Embarked  
0   80.0      0      0       27042  30.0000   A23        S  
1   74.0      0      0      347060   7.7750   NaN        S  
2   71.0      0      0    PC 17609  49.5042   NaN        C  
3   71.0      0      0    PC 17754  34.6542    A5        C  
4   70.5      0      0      370369   7.7500   NaN        Q  
5   70.0      0      0  C.A. 24580  10.5000   NaN        S  
6   70.0      1      1   WE/P 5735  71.0000   B22        S  
7   66.0      0      0  C.A. 24579  10.5000   NaN        S  
8   65.0      0      1      113509  61.9792   B30        C  
9   65.0      0      0      336439   7.7500   NaN        Q  
10  65.0      0      0       13509  26.5500   E38        S



In [63]:

    
def hundredth_row(column):
    hundredth_item = column.loc[99]
    return hundredth_item
hundredth_row = titanic_survival.apply(hundredth_row)
print hundredth_row









    



PassengerId                  100
Survived                       0
Pclass                         2
Name           Kantor, Mr. Sinai
Sex                         male
Age                           34
SibSp                          1
Parch                          0
Ticket                    244367
Fare                          26
Cabin                        NaN
Embarked                       S
dtype: object



In [64]:

    
def isnull_count(column):
    column_null = pd.isnull(column)
    null = column[column_null]
    return len(null)
column_null_count = titanic_survival.apply(isnull_count)
print column_null_count









    



PassengerId      0
Survived         0
Pclass           0
Name             0
Sex              0
Age            177
SibSp            0
Parch            0
Ticket           0
Fare             0
Cabin          687
Embarked         2
dtype: int64



In [71]:

    
import pandas as pd
findango = pd.read_csv('fandango_score_comparison.csv')
series_film = findango['FILM']
print type(series_film)
print type(findango)
print series_film[0:5]
findango.head()
series_rt = findango['RottenTomatoes']
print findango['RottenTomatoes'][0:5]









    



<class 'pandas.core.series.Series'>
<class 'pandas.core.frame.DataFrame'>
0    Avengers: Age of Ultron (2015)
1                 Cinderella (2015)
2                    Ant-Man (2015)
3            Do You Believe? (2015)
4     Hot Tub Time Machine 2 (2015)
Name: FILM, dtype: object
0    74
1    85
2    80
3    18
4    14
Name: RottenTomatoes, dtype: int64



In [86]:

    
from pandas import Series
film_name = series_film.values
print type(film_name)
#print film_name
film_sorce = series_rt.values
series_custom = Series(film_sorce,index=film_name)
series_custom[['Avengers: Age of Ultron (2015)','Cinderella (2015)']]
#series_custom = Series(film_name,index=film_sorce)
series_custom[5:10]









    



<type 'numpy.ndarray'>






    Out[86]:





The Water Diviner (2015)        63
Irrational Man (2015)           42
Top Five (2014)                 86
Shaun the Sheep Movie (2015)    99
Love & Mercy (2015)             89
dtype: int64



In [114]:

    
original_index = series_custom.index.tolist()
print type(series_custom)
#print original_index
print type(original_index)
sort_index = sorted(original_index)
#print series_custom[sort_index]
print sort_index.index
sort_by_index = series_custom.reindex(sort_index)
print sort_by_index.index









    



<class 'pandas.core.series.Series'>
<type 'list'>
<built-in method index of list object at 0x10d6cd710>
Index([u''71 (2015)', u'5 Flights Up (2015)', u'A Little Chaos (2015)',
       u'A Most Violent Year (2014)', u'About Elly (2015)', u'Aloha (2015)',
       u'American Sniper (2015)', u'American Ultra (2015)', u'Amy (2015)',
       u'Annie (2014)',
       ...
       u'Unbroken (2014)', u'Unfinished Business (2015)', u'Unfriended (2015)',
       u'Vacation (2015)', u'Welcome to Me (2015)',
       u'What We Do in the Shadows (2015)', u'When Marnie Was There (2015)',
       u'While We're Young (2015)', u'Wild Tales (2014)',
       u'Woman in Gold (2015)'],
      dtype='object', length=146)



In [105]:

    
#print np.add(sort_by_index,sort_by_index)
#print np.sin(sort_by_index)
print np.max(sort_by_index)



In [108]:

    
series_greater_than_50 = series_custom[series_custom>50]
#print series_greater_than_50
where_1 = series_custom>90
where_2 = series_custom<100
both_series = series_custom[where_1 & where_2]
print both_series









    



Shaun the Sheep Movie (2015)                   99
Leviathan (2014)                               99
Selma (2014)                                   99
Ex Machina (2015)                              92
Wild Tales (2014)                              96
The End of the Tour (2015)                     92
Red Army (2015)                                96
The Hunting Ground (2015)                      92
I'll See You In My Dreams (2015)               94
Timbuktu (2015)                                99
About Elly (2015)                              97
The Diary of a Teenage Girl (2015)             95
Birdman (2014)                                 92
The Gift (2015)                                93
Monkey Kingdom (2015)                          94
Mr. Turner (2014)                              98
The Wrecking Crew (2015)                       93
Mad Max: Fury Road (2015)                      97
Spy (2015)                                     93
Paddington (2015)                              98
What We Do in the Shadows (2015)               96
The Salt of the Earth (2015)                   96
Song of the Sea (2014)                         99
It Follows (2015)                              96
Phoenix (2015)                                 99
Tangerine (2015)                               95
Mission: Impossible â€“ Rogue Nation (2015)    92
Amy (2015)                                     97
Inside Out (2015)                              98
'71 (2015)                                     97
Two Days, One Night (2014)                     97
dtype: int64



In [111]:

    
rt_critics = Series(findango['RottenTomatoes'].values,index=findango['FILM'])
rt_user = Series(findango['RottenTomatoes_User'].values,index=findango['FILM'])
rt_mean = (rt_critics + rt_user) / 2
print rt_mean









    



FILM
Avengers: Age of Ultron (2015)                    80.0
Cinderella (2015)                                 82.5
Ant-Man (2015)                                    85.0
Do You Believe? (2015)                            51.0
Hot Tub Time Machine 2 (2015)                     21.0
The Water Diviner (2015)                          62.5
Irrational Man (2015)                             47.5
Top Five (2014)                                   75.0
Shaun the Sheep Movie (2015)                      90.5
Love & Mercy (2015)                               88.0
Far From The Madding Crowd (2015)                 80.5
Black Sea (2015)                                  71.0
Leviathan (2014)                                  89.0
Unbroken (2014)                                   60.5
The Imitation Game (2014)                         91.0
Taken 3 (2015)                                    27.5
Ted 2 (2015)                                      52.0
Southpaw (2015)                                   69.5
Night at the Museum: Secret of the Tomb (2014)    54.0
Pixels (2015)                                     35.5
McFarland, USA (2015)                             84.0
Insidious: Chapter 3 (2015)                       57.5
The Man From U.N.C.L.E. (2015)                    74.0
Run All Night (2015)                              59.5
Trainwreck (2015)                                 79.5
Selma (2014)                                      92.5
Ex Machina (2015)                                 89.0
Still Alice (2015)                                86.5
Wild Tales (2014)                                 94.0
The End of the Tour (2015)                        90.5
                                                  ... 
Clouds of Sils Maria (2015)                       78.0
Testament of Youth (2015)                         80.0
Infinitely Polar Bear (2015)                      78.0
Phoenix (2015)                                    90.0
The Wolfpack (2015)                               78.5
The Stanford Prison Experiment (2015)             85.5
Tangerine (2015)                                  90.5
Magic Mike XXL (2015)                             63.0
Home (2015)                                       55.0
The Wedding Ringer (2015)                         46.5
Woman in Gold (2015)                              66.5
The Last Five Years (2015)                        60.0
Mission: Impossible â€“ Rogue Nation (2015)       91.0
Amy (2015)                                        94.0
Jurassic World (2015)                             76.0
Minions (2015)                                    53.0
Max (2015)                                        54.0
Paul Blart: Mall Cop 2 (2015)                     20.5
The Longest Ride (2015)                           52.0
The Lazarus Effect (2015)                         18.5
The Woman In Black 2 Angel of Death (2015)        23.5
Danny Collins (2015)                              76.0
Spare Parts (2015)                                67.5
Serena (2015)                                     21.5
Inside Out (2015)                                 94.0
Mr. Holmes (2015)                                 82.5
'71 (2015)                                        89.5
Two Days, One Night (2014)                        87.5
Gett: The Trial of Viviane Amsalem (2015)         90.5
Kumiko, The Treasure Hunter (2015)                75.0
dtype: float64



In [113]:

    
fandango = pd.read_csv('fandango_score_comparison.csv')
print type(fandango)
fandango_films = fandango.set_index('FILM',drop=False)
print fandango_films.index









    



<class 'pandas.core.frame.DataFrame'>
Index([u'Avengers: Age of Ultron (2015)', u'Cinderella (2015)',
       u'Ant-Man (2015)', u'Do You Believe? (2015)',
       u'Hot Tub Time Machine 2 (2015)', u'The Water Diviner (2015)',
       u'Irrational Man (2015)', u'Top Five (2014)',
       u'Shaun the Sheep Movie (2015)', u'Love & Mercy (2015)',
       ...
       u'The Woman In Black 2 Angel of Death (2015)', u'Danny Collins (2015)',
       u'Spare Parts (2015)', u'Serena (2015)', u'Inside Out (2015)',
       u'Mr. Holmes (2015)', u''71 (2015)', u'Two Days, One Night (2014)',
       u'Gett: The Trial of Viviane Amsalem (2015)',
       u'Kumiko, The Treasure Hunter (2015)'],
      dtype='object', name=u'FILM', length=146)



In [122]:

    
#print fandango_films.dtypes
types = fandango_films.dtypes
float_types = types[types.values == 'float64'].index
float_column = fandango_films[float_types]
#print float_column
deviations = float_column.apply(lambda x: np.std(x))
print deviations









    



Metacritic_User               1.505529
IMDB                          0.955447
Fandango_Stars                0.538532
Fandango_Ratingvalue          0.501106
RT_norm                       1.503265
RT_user_norm                  0.997787
Metacritic_norm               0.972522
Metacritic_user_nom           0.752765
IMDB_norm                     0.477723
RT_norm_round                 1.509404
RT_user_norm_round            1.003559
Metacritic_norm_round         0.987561
Metacritic_user_norm_round    0.785412
IMDB_norm_round               0.501043
Fandango_Difference           0.152141
dtype: float64



In [124]:

    
rt_mt_user = float_column[['RT_user_norm','Metacritic_user_nom']]
rt_mt_user.apply(lambda x : np.std(x),axis = 1)









    Out[124]:





FILM
Avengers: Age of Ultron (2015)                    0.375
Cinderella (2015)                                 0.125
Ant-Man (2015)                                    0.225
Do You Believe? (2015)                            0.925
Hot Tub Time Machine 2 (2015)                     0.150
The Water Diviner (2015)                          0.150
Irrational Man (2015)                             0.575
Top Five (2014)                                   0.100
Shaun the Sheep Movie (2015)                      0.150
Love & Mercy (2015)                               0.050
Far From The Madding Crowd (2015)                 0.050
Black Sea (2015)                                  0.150
Leviathan (2014)                                  0.175
Unbroken (2014)                                   0.125
The Imitation Game (2014)                         0.250
Taken 3 (2015)                                    0.000
Ted 2 (2015)                                      0.175
Southpaw (2015)                                   0.050
Night at the Museum: Secret of the Tomb (2014)    0.000
Pixels (2015)                                     0.025
McFarland, USA (2015)                             0.425
Insidious: Chapter 3 (2015)                       0.325
The Man From U.N.C.L.E. (2015)                    0.025
Run All Night (2015)                              0.350
Trainwreck (2015)                                 0.350
Selma (2014)                                      0.375
Ex Machina (2015)                                 0.175
Still Alice (2015)                                0.175
Wild Tales (2014)                                 0.100
The End of the Tour (2015)                        0.350
                                                  ...  
Clouds of Sils Maria (2015)                       0.100
Testament of Youth (2015)                         0.000
Infinitely Polar Bear (2015)                      0.075
Phoenix (2015)                                    0.025
The Wolfpack (2015)                               0.075
The Stanford Prison Experiment (2015)             0.050
Tangerine (2015)                                  0.325
Magic Mike XXL (2015)                             0.250
Home (2015)                                       0.200
The Wedding Ringer (2015)                         0.825
Woman in Gold (2015)                              0.225
The Last Five Years (2015)                        0.225
Mission: Impossible â€“ Rogue Nation (2015)       0.250
Amy (2015)                                        0.075
Jurassic World (2015)                             0.275
Minions (2015)                                    0.125
Max (2015)                                        0.350
Paul Blart: Mall Cop 2 (2015)                     0.300
The Longest Ride (2015)                           0.625
The Lazarus Effect (2015)                         0.650
The Woman In Black 2 Angel of Death (2015)        0.475
Danny Collins (2015)                              0.100
Spare Parts (2015)                                0.300
Serena (2015)                                     0.700
Inside Out (2015)                                 0.025
Mr. Holmes (2015)                                 0.025
'71 (2015)                                        0.175
Two Days, One Night (2014)                        0.250
Gett: The Trial of Viviane Amsalem (2015)         0.200
Kumiko, The Treasure Hunter (2015)                0.025
dtype: float64



In [ ]:

	PassengerId	Survived	Pclass	Name	Sex	Age	SibSp	Ticket	Fare	Cabin	Embarked
0	1	0	3	Braund, Mr. Owen Harris	male	22.0	1	A/5 21171	7.2500	NaN	S
1	2	1	1	Cumings, Mrs. John Bradley (Florence Briggs Th...	female	38.0	1	PC 17599	71.2833	C85	C
2	3	1	3	Heikkinen, Miss. Laina	female	26.0	0	STON/O2. 3101282	7.9250	NaN	S
3	4	1	1	Futrelle, Mrs. Jacques Heath (Lily May Peel)	female	35.0	1	113803	53.1000	C123	S
4	5	0	3	Allen, Mr. William Henry	male	35.0	0	373450	8.0500	NaN	S