In [1]:
import quandl;
import pandas as pd;

import pickle;

import matplotlib.pyplot as plt;
from matplotlib import style;
style.use("fivethirtyeight");

In [3]:
bridge_height = {'meters':[10.26, 10.31, 10.27, 10.22, 10.23, 6212.42, 10.28, 10.25, 10.31]};
# it seems that 6212.42 - the wrong data

df = pd.DataFrame(bridge_height);
df.plot();
plt.show();



In [6]:
df["STD"] = df["meters"].rolling(window = 2).std(); # standart diviation (square mean: root(sum / count))

df.plot();
plt.show();



In [12]:
df_std = df.describe();
print(df_std);

df_std = df.describe()["meters"]["std"];
print(df_std);


            meters          STD
count     9.000000     8.000000
mean    699.394444  1096.419446
std    2067.384584  2030.121949
min      10.220000     0.007071
25%      10.250000     0.026517
50%      10.270000     0.035355
75%      10.310000  1096.425633
max    6212.420000  4385.610607
2067.38458357

In [22]:
print(df["STD"] < df_std) # wow


0    False
1     True
2     True
3     True
4     True
5    False
6    False
7     True
8     True
Name: STD, dtype: bool

In [24]:
print(df[df["STD"] < df_std]) # wow wow
junk_free_df = df[df["STD"] < df_std];


   meters       STD
1   10.31  0.035355
2   10.27  0.028284
3   10.22  0.035355
4   10.23  0.007071
7   10.25  0.021213
8   10.31  0.042426

In [25]:
junk_free_df.plot();
plt.show();



In [ ]: