In [1]:
# 加载库
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from collections import OrderedDict
from matplotlib.pylab import frange
In [2]:
# 装载数据,处理丢失的数据
fill_data = lambda x :int(x.strip() or 0)
data = np.genfromtxt('president.txt',dtype=(int,int),converters={1:fill_data},delimiter=',')
x = data[:,0]
y = data[:,1]
In [3]:
# 采用频率(独立的点的个数)对数据进行分组
# 给定一些点,Counter()返回一个字典,键是数据点,值是数据点在数据集中的频率。
x_freq = Counter(y)
x_ = np.array(x_freq.keys())
y_ = np.array(x_freq.values())
In [7]:
print x_freq
In [4]:
# 采用年份范围进行分组
x_group = OrderedDict()
group = 5
group_count = 1
keys = []
values = []
for i,xx in enumerate(x):
keys.append(xx)
values.append(y[i])
if group_count == 5:
x_group[tuple(keys)]= values
keys = []
values = []
group_count = 1
group_count +=1
x_group[tuple(keys)] = values
print x_group
In [6]:
# 绘制分组数据的点阵图
plt.subplot(311)
plt.title("Dot Plot by Frequency")
plt.plot(y_,x_,'ro')
plt.xlabel('Count')
plt.ylabel('# Frequential Request')
plt.xlim(min(y_)-1,max(y_)+1)
plt.show()
In [ ]: