In [1]:
%matplotlib inline
In [2]:
import pandas as pd
In [3]:
# theoretical data
dice_values = [1, 2, 3, 4, 5, 6]
numbers_counts = {}
for dice1 in dice_values:
for dice2 in dice_values:
dice_sum = dice1 + dice2
if dice_sum in numbers_counts:
numbers_counts[dice_sum] += 1
else:
numbers_counts[dice_sum] = 1
In [4]:
t_df = pd.DataFrame({
"number": numbers_counts.keys(),
"count": numbers_counts.values()
})
t_df
Out[4]:
In [5]:
t_df["prob"] = t_df["count"] / t_df["count"].sum()
t_df
Out[5]:
In [6]:
# experimental data
e_df = pd.read_csv("data.csv")
e_df["count"] = 1.0
e_df = e_df.groupby("number").sum()
e_df["number"] = e_df.index.astype(float)
e_df["prob"] = e_df["count"] / e_df["count"].sum()
e_df
Out[6]:
In [7]:
df = pd.merge(t_df, e_df, on="number", how="outer", suffixes=["_theoretical", "_experimental"])
df
Out[7]:
In [8]:
ax = df.plot(x="number", y=["prob_theoretical", "prob_experimental"], figsize=(8,6))
ax.set_ylabel("probability")
Out[8]:
In [ ]:
In [ ]:
In [ ]: