In [16]:
import pandas as pd
import matplotlib.pyplot as plt

In [17]:
a = [1,2,3,4,5,1,2,3,4,5,1,2,3,4,5,1,2,3,4,5]
b = [2,1,3,12,9,1,6,6,14,10,5,4,2,12,10,17,24,6,4,10]

In [18]:
c = pd.DataFrame({'a':a,'b':b})

In [19]:
c


Out[19]:
a b
0 1 2
1 2 1
2 3 3
3 4 12
4 5 9
5 1 1
6 2 6
7 3 6
8 4 14
9 5 10
10 1 5
11 2 4
12 3 2
13 4 12
14 5 10
15 1 17
16 2 24
17 3 6
18 4 4
19 5 10

In [20]:
plt.plot(c['a'],c['b'])
plt.show()



In [21]:
d = pd.DataFrame({'a':a[0:5],'count':c['b'].groupby(c['a']).count()})

In [22]:
d.describe()


Out[22]:
a count
count 5.000000 5.0
mean 3.000000 4.0
std 1.581139 0.0
min 1.000000 4.0
25% 2.000000 4.0
50% 3.000000 4.0
75% 4.000000 4.0
max 5.000000 4.0

In [23]:
d.shape


Out[23]:
(5, 2)

In [24]:
print(d)


   a  count
a          
1  1      4
2  2      4
3  3      4
4  4      4
5  5      4

In [25]:
plt.plot(d['a'],d['count'])
plt.show()



In [ ]: