In [1]:
%matplotlib inline
In [2]:
import numpy as np
import matplotlib.pyplot as plt
import csv
In [3]:
fn="/Users/qiqi/Desktop/Administrative_Discretionary_Grants 1996-2014_.csv"
with open(fn, "r") as f:
reader = csv.reader(f)
header = next(reader)
data = {}
for column in header:
data[column] = []
for row in reader:
for column, value in zip(header, row):
data[column].append(value)
In [4]:
class Dataset:
def __init__(self, data):
self.data = data.copy()
def convert(self, column, dtype):
self.data[column] = np.array(self.data[column], dtype=dtype)
def columns(self):
return self.data.keys()
def filter_eq(self, column, value):
good = (self.data[column] == value)
new_data = {}
for column in self.data:
new_data[column] = self.data[column][good]
return Dataset(new_data)
def filter_lt(self, column, value):
good = (self.data[column] < value)
new_data = {}
for column in self.data:
new_data[column] = self.data[column][good]
return Dataset(new_data)
def filter_gt(self, column, value):
good = (self.data[column] > value)
new_data = {}
for column in self.data:
new_data[column] = self.data[column][good]
return Dataset(new_data)
def filter_ne(self, column, value):
good = (self.data[column] != value)
new_data = {}
for column in self.data:
new_data[column] = self.data[column][good]
return Dataset(new_data)
def size(self):
for key in self.data:
return self.data[key].size
def split(self, column):
new_datasets = {}
for split_value in np.unique(self.data[column]):
new_datasets[split_value] = self.filter_eq(column, split_value)
return new_datasets
def stats(self):
statistics = {}
for key in self.data:
if self.data[key].dtype not in ("float", "int"):
continue
values = self.data[key]
statistics[key] = (values.min(), values.max(), values.std(), values.mean())
return statistics
def compare(self, other):
stats1 = self.stats()
stats2 = other.stats()
for column in self.columns():
if column not in stats1: continue
print("Column '{0:25s}'".format(column))
for s1, s2 in zip(stats1[column], stats2[column]):
print(" {0} vs {1}".format(s1, s2))
def plot(self, x_column, y_column):
plt.plot(self.data[x_column], self.data[y_column], '.')
In [5]:
trees = Dataset(data)
value_types = {'Fiscal Year': 'int'}
for v in trees.columns():
trees.convert(v, value_types.get(v, "str"))
In [6]:
data1=trees.filter_eq("Program Type","IA")
data2=trees.filter_eq("Program Type","IC")
data3=trees.filter_eq("Program Type","IG")
data4=trees.filter_eq("Program Type","IL")
data5=trees.filter_eq("Program Type","IM")
data6=trees.filter_eq("Program Type","IS")
data7=trees.filter_eq("Program Type","LE")
data8=trees.filter_eq("Program Type","LG")
data9=trees.filter_eq("Program Type","LI")
data10=trees.filter_eq("Program Type","LL")
data11=trees.filter_eq("Program Type","LT")
data12=trees.filter_eq("Program Type","MA")
data13=trees.filter_eq("Program Type","MG")
data14=trees.filter_eq("Program Type","MH")
data15=trees.filter_eq("Program Type","ML")
data16=trees.filter_eq("Program Type","MN")
data17=trees.filter_eq("Program Type","MP")
data18=trees.filter_eq("Program Type","NC")
data19=trees.filter_eq("Program Type","ND")
data20=trees.filter_eq("Program Type","NE")
data21=trees.filter_eq("Program Type","NG")
data22=trees.filter_eq("Program Type","NL")
data23=trees.filter_eq("Program Type","NO")
data24=trees.filter_eq("Program Type","NP")
data25=trees.filter_eq("Program Type","NR")
data26=trees.filter_eq("Program Type","RE")
data27=trees.filter_eq("Program Type","SP")
data28=trees.filter_eq("Program Type","ST")
In [7]:
trunk=[]
trunk.append(len(data1.data["Program Type"]) )
trunk.append(len(data2.data["Program Type"]) )
trunk.append(len(data3.data["Program Type"]) )
trunk.append(len(data4.data["Program Type"]) )
trunk.append(len(data5.data["Program Type"]) )
trunk.append(len(data6.data["Program Type"]) )
trunk.append(len(data7.data["Program Type"]) )
trunk.append(len(data8.data["Program Type"]) )
trunk.append(len(data9.data["Program Type"]) )
trunk.append(len(data10.data["Program Type"]) )
trunk.append(len(data11.data["Program Type"]) )
trunk.append(len(data12.data["Program Type"]) )
trunk.append(len(data13.data["Program Type"]) )
trunk.append(len(data14.data["Program Type"]) )
trunk.append(len(data15.data["Program Type"]) )
trunk.append(len(data16.data["Program Type"]) )
trunk.append(len(data17.data["Program Type"]) )
trunk.append(len(data18.data["Program Type"]) )
trunk.append(len(data19.data["Program Type"]) )
trunk.append(len(data20.data["Program Type"]) )
trunk.append(len(data21.data["Program Type"]) )
trunk.append(len(data22.data["Program Type"]) )
trunk.append(len(data23.data["Program Type"]) )
trunk.append(len(data24.data["Program Type"]) )
trunk.append(len(data25.data["Program Type"]) )
trunk.append(len(data26.data["Program Type"]) )
trunk.append(len(data27.data["Program Type"]) )
trunk.append(len(data28.data["Program Type"]) )
In [8]:
trunk
Out[8]:
In [9]:
trunk1=sorted(trunk,reverse=True)
In [10]:
trunk1
Out[10]:
In [11]:
x_label=["IA","IC","IG","IL","IM","IS","LE","LG","LI","LL","LT","MA",
"MG","MH","ML","MN","MP","NC","ND","NE","NG","NL","NO","NP","NR","RE","SP","ST"]
In [12]:
type(x_label)
Out[12]:
In [13]:
x_number=[]
x_number=range(1,29)
In [14]:
x_number
Out[14]:
In [15]:
def autolabel(rects):
for rect in rects:
height = rect.get_height()
plt.text(rect.get_x()+rect.get_width()/6., 1.01*height, '%s' % int(height),fontsize=14)
In [17]:
plt.rcParams["figure.figsize"] = (20, 20)
rect=plt.bar(x_number,trunk1,align="center",width=0.8,alpha=0.5)
plt.xticks(x_number,x_label)
plt.rc('xtick', labelsize = 15)
plt.rc('ytick', labelsize = 15)
plt.ylabel('number of grants',fontsize = 20)
plt.xlabel('Grants Program type',fontsize=20)
plt.title('Number of Different Grants Program Types',fontsize = 20)
autolabel(rect)
plt.show()
In [ ]: