In [1]:
import pandas
food_info = pandas.read_csv("food_info.csv")
#print(type(food_info))
print (food_info.dtypes)


NDB_No               int64
Shrt_Desc           object
Water_(g)          float64
Energ_Kcal           int64
Protein_(g)        float64
Lipid_Tot_(g)      float64
Ash_(g)            float64
Carbohydrt_(g)     float64
Fiber_TD_(g)       float64
Sugar_Tot_(g)      float64
Calcium_(mg)       float64
Iron_(mg)          float64
Magnesium_(mg)     float64
Phosphorus_(mg)    float64
Potassium_(mg)     float64
Sodium_(mg)        float64
Zinc_(mg)          float64
Copper_(mg)        float64
Manganese_(mg)     float64
Selenium_(mcg)     float64
Vit_C_(mg)         float64
Thiamin_(mg)       float64
Riboflavin_(mg)    float64
Niacin_(mg)        float64
Vit_B6_(mg)        float64
Vit_B12_(mcg)      float64
Vit_A_IU           float64
Vit_A_RAE          float64
Vit_E_(mg)         float64
Vit_D_mcg          float64
Vit_D_IU           float64
Vit_K_(mcg)        float64
FA_Sat_(g)         float64
FA_Mono_(g)        float64
FA_Poly_(g)        float64
Cholestrl_(mg)     float64
dtype: object

In [3]:
first_rows = food_info.head()
#first_rows
#print(food_info.head(3))
#print (food_info.columns)
#print (food_info.shape)


Out[3]:
NDB_No Shrt_Desc Water_(g) Energ_Kcal Protein_(g) Lipid_Tot_(g) Ash_(g) Carbohydrt_(g) Fiber_TD_(g) Sugar_Tot_(g) ... Vit_A_IU Vit_A_RAE Vit_E_(mg) Vit_D_mcg Vit_D_IU Vit_K_(mcg) FA_Sat_(g) FA_Mono_(g) FA_Poly_(g) Cholestrl_(mg)
0 1001 BUTTER WITH SALT 15.87 717 0.85 81.11 2.11 0.06 0.0 0.06 ... 2499.0 684.0 2.32 1.5 60.0 7.0 51.368 21.021 3.043 215.0
1 1002 BUTTER WHIPPED WITH SALT 15.87 717 0.85 81.11 2.11 0.06 0.0 0.06 ... 2499.0 684.0 2.32 1.5 60.0 7.0 50.489 23.426 3.012 219.0
2 1003 BUTTER OIL ANHYDROUS 0.24 876 0.28 99.48 0.00 0.00 0.0 0.00 ... 3069.0 840.0 2.80 1.8 73.0 8.6 61.924 28.732 3.694 256.0
3 1004 CHEESE BLUE 42.41 353 21.40 28.74 5.11 2.34 0.0 0.50 ... 721.0 198.0 0.25 0.5 21.0 2.4 18.669 7.778 0.800 75.0
4 1005 CHEESE BRICK 41.11 371 23.24 29.68 3.18 2.79 0.0 0.51 ... 1080.0 292.0 0.26 0.5 22.0 2.5 18.764 8.598 0.784 94.0

5 rows × 36 columns


In [5]:
#pandas uses zero-indexing
#Series object representing the row at index 0.
#print (food_info.loc[0])

# Series object representing the seventh row.
#food_info.loc[6]

# Will throw an error: "KeyError: 'the label [8620] is not in the [index]'"
#food_info.loc[8620]
#The object dtype is equivalent to a string in Python

In [25]:
#object - For string values
#int - For integer values
#float - For float values
#datetime - For time values
#bool - For Boolean values
#print(food_info.dtypes)

In [44]:
# Returns a DataFrame containing the rows at indexes 3, 4, 5, and 6.
#food_info.loc[3:6]

# Returns a DataFrame containing the rows at indexes 2, 5, and 10. Either of the following approaches will work.
# Method 1
#two_five_ten = [2,5,10] 
#food_info.loc[two_five_ten]

# Method 2
#food_info.loc[[2,5,10]]


Out[44]:
NDB_No Shrt_Desc Water_(g) Energ_Kcal Protein_(g) Lipid_Tot_(g) Ash_(g) Carbohydrt_(g) Fiber_TD_(g) Sugar_Tot_(g) ... Vit_A_IU Vit_A_RAE Vit_E_(mg) Vit_D_mcg Vit_D_IU Vit_K_(mcg) FA_Sat_(g) FA_Mono_(g) FA_Poly_(g) Cholestrl_(mg)
2 1003 BUTTER OIL ANHYDROUS 0.24 876 0.28 99.48 0.00 0.00 0.0 0.00 ... 3069.0 840.0 2.80 1.8 73.0 8.6 61.924 28.732 3.694 256.0
5 1006 CHEESE BRIE 48.42 334 20.75 27.68 2.70 0.45 0.0 0.45 ... 592.0 174.0 0.24 0.5 20.0 2.3 17.410 8.013 0.826 100.0
10 1011 CHEESE COLBY 38.20 394 23.76 32.11 3.36 2.57 0.0 0.52 ... 994.0 264.0 0.28 0.6 24.0 2.7 20.218 9.280 0.953 95.0

3 rows × 36 columns


In [47]:
# Series object representing the "NDB_No" column.
#ndb_col = food_info["NDB_No"]
#print ndb_col
# Alternatively, you can access a column by passing in a string variable.
#col_name = "NDB_No"
#ndb_col = food_info[col_name]

In [49]:
#columns = ["Zinc_(mg)", "Copper_(mg)"]
#zinc_copper = food_info[columns]
#print zinc_copper
#print zinc_copper
# Skipping the assignment.
#zinc_copper = food_info[["Zinc_(mg)", "Copper_(mg)"]]

In [54]:
#print(food_info.columns)
#print(food_info.head(2))
col_names = food_info.columns.tolist()
#print col_names
gram_columns = []

for c in col_names:
    if c.endswith("(g)"):
        gram_columns.append(c)
gram_df = food_info[gram_columns]
print(gram_df.head(3))


   Water_(g)  Protein_(g)  Lipid_Tot_(g)  Ash_(g)  Carbohydrt_(g)  \
0      15.87         0.85          81.11     2.11            0.06   
1      15.87         0.85          81.11     2.11            0.06   
2       0.24         0.28          99.48     0.00            0.00   

   Fiber_TD_(g)  Sugar_Tot_(g)  FA_Sat_(g)  FA_Mono_(g)  FA_Poly_(g)  
0           0.0           0.06      51.368       21.021        3.043  
1           0.0           0.06      50.489       23.426        3.012  
2           0.0           0.00      61.924       28.732        3.694  

In [ ]: