Data exploration for dashboard assignment


In [2]:
import pandas as pd
df = pd.read_csv("cereals/cereal.csv", sep = " ",
                 names=["name","mfr","type", "calories", "protein", "fat", "sodium", "fiber", 
                         "carbo", "sugars", "shelf", "potass", "vitamins", "weight", "cups"]
                )
df


Out[2]:
name mfr type calories protein fat sodium fiber carbo sugars shelf potass vitamins weight cups
0 100%_Bran N C 70 4 1 130 10.0 5.0 6 3 280 25 1.00 0.33
1 100%_Natural_Bran Q C 120 3 5 15 2.0 8.0 8 3 135 0 1.00 -1.00
2 All-Bran K C 70 4 1 260 9.0 7.0 5 3 320 25 1.00 0.33
3 All-Bran_with_Extra_Fiber K C 50 4 0 140 14.0 8.0 0 3 330 25 1.00 0.50
4 Almond_Delight R C 110 2 2 200 1.0 14.0 8 3 -1 25 1.00 0.75
5 Apple_Cinnamon_Cheerios G C 110 2 2 180 1.5 10.5 10 1 70 25 1.00 0.75
6 Apple_Jacks K C 110 2 0 125 1.0 11.0 14 2 30 25 1.00 1.00
7 Basic_4 G C 130 3 2 210 2.0 18.0 8 3 100 25 1.33 0.75
8 Bran_Chex R C 90 2 1 200 4.0 15.0 6 1 125 25 1.00 0.67
9 Bran_Flakes P C 90 3 0 210 5.0 13.0 5 3 190 25 1.00 0.67
10 Cap'n'Crunch Q C 120 1 2 220 0.0 12.0 12 2 35 25 1.00 0.75
11 Cheerios G C 110 6 2 290 2.0 17.0 1 1 105 25 1.00 1.25
12 Cinnamon_Toast_Crunch G C 120 1 3 210 0.0 13.0 9 2 45 25 1.00 0.75
13 Clusters G C 110 3 2 140 2.0 13.0 7 3 105 25 1.00 0.50
14 Cocoa_Puffs G C 110 1 1 180 0.0 12.0 13 2 55 25 1.00 1.00
15 Corn_Chex R C 110 2 0 280 0.0 22.0 3 1 25 25 1.00 1.00
16 Corn_Flakes K C 100 2 0 290 1.0 21.0 2 1 35 25 1.00 1.00
17 Corn_Pops K C 110 1 0 90 1.0 13.0 12 2 20 25 1.00 1.00
18 Count_Chocula G C 110 1 1 180 0.0 12.0 13 2 65 25 1.00 1.00
19 Cracklin'_Oat_Bran K C 110 3 3 140 4.0 10.0 7 3 160 25 1.00 0.50
20 Cream_of_Wheat_(Quick) N H 100 3 0 80 1.0 21.0 0 2 -1 0 1.00 1.00
21 Crispix K C 110 2 0 220 1.0 21.0 3 3 30 25 1.00 1.00
22 Crispy_Wheat_&_Raisins G C 100 2 1 140 2.0 11.0 10 3 120 25 1.00 0.75
23 Double_Chex R C 100 2 0 190 1.0 18.0 5 3 80 25 1.00 0.75
24 Froot_Loops K C 110 2 1 125 1.0 11.0 13 2 30 25 1.00 1.00
25 Frosted_Flakes K C 110 1 0 200 1.0 14.0 11 1 25 25 1.00 0.75
26 Frosted_Mini-Wheats K C 100 3 0 0 3.0 14.0 7 2 100 25 1.00 0.80
27 Fruit_&_Fibre_Dates_Walnuts_and_Oats P C 120 3 2 160 5.0 12.0 10 3 200 25 1.25 0.67
28 Fruitful_Bran K C 120 3 0 240 5.0 14.0 12 3 190 25 1.33 0.67
29 Fruity_Pebbles P C 110 1 1 135 0.0 13.0 12 2 25 25 1.00 0.75
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
47 Multi-Grain_Cheerios G C 100 2 1 220 2.0 15.0 6 1 90 25 1.00 1.00
48 Nut&Honey_Crunch K C 120 2 1 190 0.0 15.0 9 2 40 25 1.00 0.67
49 Nutri-Grain_Almond-Raisin K C 140 3 2 220 3.0 21.0 7 3 130 25 1.33 0.67
50 Nutri-grain_Wheat K C 90 3 0 170 3.0 18.0 2 3 90 25 1.00 -1.00
51 Oatmeal_Raisin_Crisp G C 130 3 2 170 1.5 13.5 10 3 120 25 1.25 0.50
52 Post_Nat._Raisin_Bran P C 120 3 1 200 6.0 11.0 14 3 260 25 1.33 0.67
53 Product_19 K C 100 3 0 320 1.0 20.0 3 3 45 100 1.00 1.00
54 Puffed_Rice Q C 50 1 0 0 0.0 13.0 0 3 15 0 0.50 1.00
55 Puffed_Wheat Q C 50 2 0 0 1.0 10.0 0 3 50 0 0.50 -1.00
56 Quaker_Oat_Squares Q C 100 4 1 135 2.0 14.0 6 3 110 25 1.00 0.50
57 Quaker_Oatmeal Q H 100 5 2 0 2.7 -1.0 -1 1 110 0 1.00 0.67
58 Raisin_Bran K C 120 3 1 210 5.0 14.0 12 2 240 25 1.33 0.75
59 Raisin_Nut_Bran G C 100 3 2 140 2.5 10.5 8 3 140 25 1.00 0.50
60 Raisin_Squares K C 90 2 0 0 2.0 15.0 6 3 110 25 1.00 0.50
61 Rice_Chex R C 110 1 0 240 0.0 23.0 2 1 30 25 1.00 1.13
62 Rice_Krispies K C 110 2 0 290 0.0 22.0 3 1 35 25 1.00 1.00
63 Shredded_Wheat N C 80 2 0 0 3.0 16.0 0 1 95 0 0.83 -1.00
64 Shredded_Wheat_'n'Bran N C 90 3 0 0 4.0 19.0 0 1 140 0 1.00 0.67
65 Shredded_Wheat_spoon_size N C 90 3 0 0 3.0 20.0 0 1 120 0 1.00 0.67
66 Smacks K C 110 2 1 70 1.0 9.0 15 2 40 25 1.00 0.75
67 Special_K K C 110 6 0 230 1.0 16.0 3 1 55 25 1.00 1.00
68 Strawberry_Fruit_Wheats N C 90 2 0 15 3.0 15.0 5 2 90 25 1.00 -1.00
69 Total_Corn_Flakes G C 110 2 1 200 0.0 21.0 3 3 35 100 1.00 1.00
70 Total_Raisin_Bran G C 140 3 1 190 4.0 15.0 14 3 230 100 1.50 1.00
71 Total_Whole_Grain G C 100 3 1 200 3.0 16.0 3 3 110 100 1.00 1.00
72 Triples G C 110 2 1 250 0.0 21.0 3 3 60 25 1.00 0.75
73 Trix G C 110 1 1 140 0.0 13.0 12 2 25 25 1.00 1.00
74 Wheat_Chex R C 100 3 1 230 3.0 17.0 3 1 115 25 1.00 0.67
75 Wheaties G C 100 3 1 200 3.0 17.0 3 1 110 25 1.00 1.00
76 Wheaties_Honey_Gold G C 110 2 1 200 1.0 16.0 8 1 60 25 1.00 0.75

77 rows × 15 columns


In [30]:
print("\nAverage Calories in ALL cereals:", sum(df["calories"])/float(len(df))
     ,"\nAverage Sugar in ALL cereals:", sum(df["sugars"])/float(len(df)))


Average Calories in ALL cereals: 106.883116883 
Average Sugar in ALL cereals: 6.92207792208

In [38]:
from __future__ import print_function
mfr = {"A":"American Home Food Products", "G":"General Mills", "K":"Kelloggs",
       "N":"Nabisco", "P":"Post", "Q":"Quaker Oats" ,"R":"Ralston Purina"}
sugar_brand = {} 
calories_brand = {}
for k in mfr.iterkeys():
    brand_df = df[df["mfr"] == k]
    sugar_brand[mfr[k]] = sum(brand_df["sugars"])/float(len(brand_df))
    calories_brand[mfr[k]] = sum(brand_df["calories"])/float(len(brand_df))
    print ("\n========\nBrand: ", mfr[k],
           "\nAverage Sugar", sugar_brand[mfr[k]]
           ,"\nAverage Calories", calories_brand[mfr[k]]
           ,"\nProducts: " , len(brand_df), "\n", brand_df["name"])


========
Brand:  American Home Food Products 
Average Sugar 3.0 
Average Calories 100.0 
Products:  1 
 43    Maypo
Name: name, dtype: object

========
Brand:  General Mills 
Average Sugar 7.95454545455 
Average Calories 111.363636364 
Products:  22 
 5     Apple_Cinnamon_Cheerios
7                     Basic_4
11                   Cheerios
12      Cinnamon_Toast_Crunch
13                   Clusters
14                Cocoa_Puffs
18              Count_Chocula
22     Crispy_Wheat_&_Raisins
31             Golden_Grahams
36         Honey_Nut_Cheerios
40                        Kix
42               Lucky_Charms
47       Multi-Grain_Cheerios
51       Oatmeal_Raisin_Crisp
59            Raisin_Nut_Bran
69          Total_Corn_Flakes
70          Total_Raisin_Bran
71          Total_Whole_Grain
72                    Triples
73                       Trix
75                   Wheaties
76        Wheaties_Honey_Gold
Name: name, dtype: object

========
Brand:  Kelloggs 
Average Sugar 7.5652173913 
Average Calories 108.695652174 
Products:  23 
 2                        All-Bran
3       All-Bran_with_Extra_Fiber
6                     Apple_Jacks
16                    Corn_Flakes
17                      Corn_Pops
19             Cracklin'_Oat_Bran
21                        Crispix
24                    Froot_Loops
25                 Frosted_Flakes
26            Frosted_Mini-Wheats
28                  Fruitful_Bran
38    Just_Right_Crunchy__Nuggets
39         Just_Right_Fruit_&_Nut
46           Mueslix_Crispy_Blend
48               Nut&Honey_Crunch
49      Nutri-Grain_Almond-Raisin
50              Nutri-grain_Wheat
53                     Product_19
58                    Raisin_Bran
60                 Raisin_Squares
62                  Rice_Krispies
66                         Smacks
67                      Special_K
Name: name, dtype: object

========
Brand:  Nabisco 
Average Sugar 1.83333333333 
Average Calories 86.6666666667 
Products:  6 
 0                     100%_Bran
20       Cream_of_Wheat_(Quick)
63               Shredded_Wheat
64       Shredded_Wheat_'n'Bran
65    Shredded_Wheat_spoon_size
68      Strawberry_Fruit_Wheats
Name: name, dtype: object

========
Brand:  Quaker Oats 
Average Sugar 5.25 
Average Calories 95.0 
Products:  8 
 1      100%_Natural_Bran
10          Cap'n'Crunch
35      Honey_Graham_Ohs
41                  Life
54           Puffed_Rice
55          Puffed_Wheat
56    Quaker_Oat_Squares
57        Quaker_Oatmeal
Name: name, dtype: object

========
Brand:  Post 
Average Sugar 8.77777777778 
Average Calories 108.888888889 
Products:  9 
 9                              Bran_Flakes
27    Fruit_&_Fibre_Dates_Walnuts_and_Oats
29                          Fruity_Pebbles
30                            Golden_Crisp
32                       Grape_Nuts_Flakes
33                              Grape-Nuts
34                      Great_Grains_Pecan
37                              Honey-comb
52                   Post_Nat._Raisin_Bran
Name: name, dtype: object

========
Brand:  Ralston Purina 
Average Sugar 6.125 
Average Calories 115.0 
Products:  8 
 4                      Almond_Delight
8                           Bran_Chex
15                          Corn_Chex
23                        Double_Chex
44     Muesli_Raisins_Dates_&_Almonds
45    Muesli_Raisins_Peaches_&_Pecans
61                          Rice_Chex
74                         Wheat_Chex
Name: name, dtype: object

In [46]:
import json
print("Sugar in Brands:\n", json.dumps(sugar_brand, indent=2), "\nHealthy:", [k for k, v in sugar_brand.iteritems() if v == min(sugar_brand.itervalues())] )
print("\nCalories in Brands:\n", json.dumps(calories_brand, indent=2), "\nHealthy:", [k for k, v in calories_brand.iteritems() if v == min(calories_brand.itervalues())] )


Sugar in Brands:
 {
  "Kelloggs": 7.5652173913043477, 
  "Ralston Purina": 6.125, 
  "Nabisco": 1.8333333333333333, 
  "Quaker Oats": 5.25, 
  "American Home Food Products": 3.0, 
  "Post": 8.7777777777777786, 
  "General Mills": 7.9545454545454541
} 
Healthy: ['Nabisco']

Calories in Brands:
 {
  "Kelloggs": 108.69565217391305, 
  "Ralston Purina": 115.0, 
  "Nabisco": 86.666666666666671, 
  "Quaker Oats": 95.0, 
  "American Home Food Products": 100.0, 
  "Post": 108.88888888888889, 
  "General Mills": 111.36363636363636
} 
Healthy: ['Nabisco']

In [32]:
class PDF(object):
    def __init__(self, pdf, size=(200,200)):
    self.pdf = pdf
    self.size = size
    
    def _repr_html_(self):
        return '<iframe src={0} width={1[0]} height={1[1]}></iframe>'.format(self.pdf, self.size)
    
    def _repr_latex_(self):
        return r'\includegraphics[width=1.0\textwidth]{{{0}}}'.format(self.pdf)

In [36]:
print("Sugar Study")
PDF('Sugar_Cereal_Study.pdf',size=(800,600))


Sugar Study
Out[36]:

In [ ]:
# Si quieres guardar el df o un sub_df tan solo haz df.to_csv("name")