notebook.community

Edit and run



In [1]:

    
import pandas
food_info = pandas.read_csv("food_info.csv")
#print(type(food_info))
print (food_info.dtypes)









    



NDB_No               int64
Shrt_Desc           object
Water_(g)          float64
Energ_Kcal           int64
Protein_(g)        float64
Lipid_Tot_(g)      float64
Ash_(g)            float64
Carbohydrt_(g)     float64
Fiber_TD_(g)       float64
Sugar_Tot_(g)      float64
Calcium_(mg)       float64
Iron_(mg)          float64
Magnesium_(mg)     float64
Phosphorus_(mg)    float64
Potassium_(mg)     float64
Sodium_(mg)        float64
Zinc_(mg)          float64
Copper_(mg)        float64
Manganese_(mg)     float64
Selenium_(mcg)     float64
Vit_C_(mg)         float64
Thiamin_(mg)       float64
Riboflavin_(mg)    float64
Niacin_(mg)        float64
Vit_B6_(mg)        float64
Vit_B12_(mcg)      float64
Vit_A_IU           float64
Vit_A_RAE          float64
Vit_E_(mg)         float64
Vit_D_mcg          float64
Vit_D_IU           float64
Vit_K_(mcg)        float64
FA_Sat_(g)         float64
FA_Mono_(g)        float64
FA_Poly_(g)        float64
Cholestrl_(mg)     float64
dtype: object



In [3]:

    
first_rows = food_info.head()
#first_rows
#print(food_info.head(3))
#print (food_info.columns)
#print (food_info.shape)









    Out[3]:






  
    
      
      NDB_No
      Shrt_Desc
      Water_(g)
      Energ_Kcal
      Protein_(g)
      Lipid_Tot_(g)
      Ash_(g)
      Carbohydrt_(g)
      Fiber_TD_(g)
      Sugar_Tot_(g)
      ...
      Vit_A_IU
      Vit_A_RAE
      Vit_E_(mg)
      Vit_D_mcg
      Vit_D_IU
      Vit_K_(mcg)
      FA_Sat_(g)
      FA_Mono_(g)
      FA_Poly_(g)
      Cholestrl_(mg)
    
  
  
    
      0
      1001
      BUTTER WITH SALT
      15.87
      717
      0.85
      81.11
      2.11
      0.06
      0.0
      0.06
      ...
      2499.0
      684.0
      2.32
      1.5
      60.0
      7.0
      51.368
      21.021
      3.043
      215.0
    
    
      1
      1002
      BUTTER WHIPPED WITH SALT
      15.87
      717
      0.85
      81.11
      2.11
      0.06
      0.0
      0.06
      ...
      2499.0
      684.0
      2.32
      1.5
      60.0
      7.0
      50.489
      23.426
      3.012
      219.0
    
    
      2
      1003
      BUTTER OIL ANHYDROUS
      0.24
      876
      0.28
      99.48
      0.00
      0.00
      0.0
      0.00
      ...
      3069.0
      840.0
      2.80
      1.8
      73.0
      8.6
      61.924
      28.732
      3.694
      256.0
    
    
      3
      1004
      CHEESE BLUE
      42.41
      353
      21.40
      28.74
      5.11
      2.34
      0.0
      0.50
      ...
      721.0
      198.0
      0.25
      0.5
      21.0
      2.4
      18.669
      7.778
      0.800
      75.0
    
    
      4
      1005
      CHEESE BRICK
      41.11
      371
      23.24
      29.68
      3.18
      2.79
      0.0
      0.51
      ...
      1080.0
      292.0
      0.26
      0.5
      22.0
      2.5
      18.764
      8.598
      0.784
      94.0
    
  

5 rows × 36 columns



In [5]:

    
#pandas uses zero-indexing
#Series object representing the row at index 0.
#print (food_info.loc[0])

# Series object representing the seventh row.
#food_info.loc[6]

# Will throw an error: "KeyError: 'the label [8620] is not in the [index]'"
#food_info.loc[8620]
#The object dtype is equivalent to a string in Python



In [25]:

    
#object - For string values
#int - For integer values
#float - For float values
#datetime - For time values
#bool - For Boolean values
#print(food_info.dtypes)



In [44]:

    
# Returns a DataFrame containing the rows at indexes 3, 4, 5, and 6.
#food_info.loc[3:6]

# Returns a DataFrame containing the rows at indexes 2, 5, and 10. Either of the following approaches will work.
# Method 1
#two_five_ten = [2,5,10] 
#food_info.loc[two_five_ten]

# Method 2
#food_info.loc[[2,5,10]]









    Out[44]:






  
    
      
      NDB_No
      Shrt_Desc
      Water_(g)
      Energ_Kcal
      Protein_(g)
      Lipid_Tot_(g)
      Ash_(g)
      Carbohydrt_(g)
      Fiber_TD_(g)
      Sugar_Tot_(g)
      ...
      Vit_A_IU
      Vit_A_RAE
      Vit_E_(mg)
      Vit_D_mcg
      Vit_D_IU
      Vit_K_(mcg)
      FA_Sat_(g)
      FA_Mono_(g)
      FA_Poly_(g)
      Cholestrl_(mg)
    
  
  
    
      2
      1003
      BUTTER OIL ANHYDROUS
      0.24
      876
      0.28
      99.48
      0.00
      0.00
      0.0
      0.00
      ...
      3069.0
      840.0
      2.80
      1.8
      73.0
      8.6
      61.924
      28.732
      3.694
      256.0
    
    
      5
      1006
      CHEESE BRIE
      48.42
      334
      20.75
      27.68
      2.70
      0.45
      0.0
      0.45
      ...
      592.0
      174.0
      0.24
      0.5
      20.0
      2.3
      17.410
      8.013
      0.826
      100.0
    
    
      10
      1011
      CHEESE COLBY
      38.20
      394
      23.76
      32.11
      3.36
      2.57
      0.0
      0.52
      ...
      994.0
      264.0
      0.28
      0.6
      24.0
      2.7
      20.218
      9.280
      0.953
      95.0
    
  

3 rows × 36 columns



In [47]:

    
# Series object representing the "NDB_No" column.
#ndb_col = food_info["NDB_No"]
#print ndb_col
# Alternatively, you can access a column by passing in a string variable.
#col_name = "NDB_No"
#ndb_col = food_info[col_name]



In [49]:

    
#columns = ["Zinc_(mg)", "Copper_(mg)"]
#zinc_copper = food_info[columns]
#print zinc_copper
#print zinc_copper
# Skipping the assignment.
#zinc_copper = food_info[["Zinc_(mg)", "Copper_(mg)"]]



In [54]:

    
#print(food_info.columns)
#print(food_info.head(2))
col_names = food_info.columns.tolist()
#print col_names
gram_columns = []

for c in col_names:
    if c.endswith("(g)"):
        gram_columns.append(c)
gram_df = food_info[gram_columns]
print(gram_df.head(3))









    



   Water_(g)  Protein_(g)  Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  \
0      15.87         0.85          81.11     2.11            0.06   
1      15.87         0.85          81.11     2.11            0.06   
2       0.24         0.28          99.48     0.00            0.00   

   Fiber_TD_(g)  Sugar_Tot_(g)  FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  
0           0.0           0.06      51.368       21.021        3.043  
1           0.0           0.06      50.489       23.426        3.012  
2           0.0           0.00      61.924       28.732        3.694



In [ ]:

	NDB_No	Shrt_Desc	Water_(g)	Energ_Kcal	Protein_(g)	Lipid_Tot_(g)	Ash_(g)	Carbohydrt_(g)	Sugar_Tot_(g)	...	Vit_A_IU	Vit_A_RAE	Vit_E_(mg)	Vit_D_mcg	Vit_D_IU	Vit_K_(mcg)	FA_Sat_(g)	FA_Mono_(g)	FA_Poly_(g)	Cholestrl_(mg)
0	1001	BUTTER WITH SALT	15.87	717	0.85	81.11	2.11	0.06	0.06	...	2499.0	684.0	2.32	1.5	60.0	7.0	51.368	21.021	3.043	215.0
1	1002	BUTTER WHIPPED WITH SALT	15.87	717	0.85	81.11	2.11	0.06	0.06	...	2499.0	684.0	2.32	1.5	60.0	7.0	50.489	23.426	3.012	219.0
2	1003	BUTTER OIL ANHYDROUS	0.24	876	0.28	99.48	0.00	0.00	0.00	...	3069.0	840.0	2.80	1.8	73.0	8.6	61.924	28.732	3.694	256.0
3	1004	CHEESE BLUE	42.41	353	21.40	28.74	5.11	2.34	0.50	...	721.0	198.0	0.25	0.5	21.0	2.4	18.669	7.778	0.800	75.0
4	1005	CHEESE BRICK	41.11	371	23.24	29.68	3.18	2.79	0.51	...	1080.0	292.0	0.26	0.5	22.0	2.5	18.764	8.598	0.784	94.0