Guided project that analysis data for Thanksgiving dinner in the US.
In [1]:
import pandas as pd
data = pd.read_csv("thanksgiving.csv", encoding="Latin-1")
data.head()
Out[1]:
In [2]:
data.columns
Out[2]:
In [3]:
data["Do you celebrate Thanksgiving?"].value_counts()
Out[3]:
In [4]:
data = data[data["Do you celebrate Thanksgiving?"] == "Yes"]
In [5]:
data["What is typically the main dish at your Thanksgiving dinner?"].value_counts()
Out[5]:
In [6]:
data[data["What is typically the main dish at your Thanksgiving dinner?"] == "Tofurkey"]["Do you typically have gravy?"]
Out[6]:
In [7]:
data["Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Apple"].value_counts()
Out[7]:
In [8]:
data["Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Pumpkin"].value_counts()
Out[8]:
In [9]:
data["Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Pecan"].value_counts()
Out[9]:
In [11]:
ate_pies = (pd.isnull(data["Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Apple"])
&
pd.isnull(data["Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Pecan"])
&
pd.isnull(data["Which type of pie is typically served at your Thanksgiving dinner? Please select all that apply. - Pumpkin"])
)
ate_pies.value_counts()
Out[11]:
In [12]:
data["Age"].value_counts()
Out[12]:
In [13]:
def extract_age(age_str):
if pd.isnull(age_str):
return None
age_str = age_str.split(" ")[0]
age_str = age_str.replace("+", "")
return int(age_str)
data["int_age"] = data["Age"].apply(extract_age)
data["int_age"].describe()
Out[13]:
In [17]:
def extract_income(income_str):
if pd.isnull(income_str):
return None
income_str = income_str.split(" ")[0]
if income_str == "Prefer":
return None
income_str = income_str.replace("$", "")
income_str = income_str.replace(",", "")
return int(income_str)
data["int_income"] = data["How much total combined money did all members of your HOUSEHOLD earn last year?"].apply(extract_income)
data["int_income"].describe()
Out[17]:
In [22]:
data[data["int_income"] < 50000]["How far will you travel for Thanksgiving?"].value_counts()
Out[22]:
In [23]:
data[data["int_income"] > 150000]["How far will you travel for Thanksgiving?"].value_counts()
Out[23]:
In [24]:
data.pivot_table(
index="Have you ever tried to meet up with hometown friends on Thanksgiving night?",
columns='Have you ever attended a "Friendsgiving?"',
values="int_age"
)
Out[24]:
In [25]:
data.pivot_table(
index="Have you ever tried to meet up with hometown friends on Thanksgiving night?",
columns='Have you ever attended a "Friendsgiving?"',
values="int_income"
)
Out[25]: