In [1]:
%matplotlib inline

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import csv

In [3]:
fn="/Users/qiqi/Desktop/Administrative_Discretionary_Grants 1996-2014_.csv"
with open(fn, "r") as f:
    reader = csv.reader(f)
    header = next(reader)
    data = {}
    for column in header:
        data[column] = []
    for row in reader:
        for column, value in zip(header, row):
            data[column].append(value)

In [4]:
class Dataset:
    def __init__(self, data):
        self.data = data.copy()
        
    def convert(self, column, dtype):
        self.data[column] = np.array(self.data[column], dtype=dtype)
        
    def columns(self):
        return self.data.keys()
    
    def filter_eq(self, column, value):
        good = (self.data[column] == value)
        new_data = {}
        for column in self.data:
            new_data[column] = self.data[column][good]
        return Dataset(new_data)
    
    def filter_lt(self, column, value):
        good = (self.data[column] < value)
        new_data = {}
        for column in self.data:
            new_data[column] = self.data[column][good]
        return Dataset(new_data)
    
    def filter_gt(self, column, value):
        good = (self.data[column] > value)
        new_data = {}
        for column in self.data:
            new_data[column] = self.data[column][good]
        return Dataset(new_data)
    
    def filter_ne(self, column, value):
        good = (self.data[column] != value)
        new_data = {}
        for column in self.data:
            new_data[column] = self.data[column][good]
        return Dataset(new_data)
    
    def size(self):
        for key in self.data:
            return self.data[key].size

    def split(self, column):
        new_datasets = {}
        for split_value in np.unique(self.data[column]):
            new_datasets[split_value] = self.filter_eq(column, split_value)
        return new_datasets

    def stats(self):
        statistics = {}
        for key in self.data:
            if self.data[key].dtype not in ("float", "int"):
                continue
            values = self.data[key]
            statistics[key] = (values.min(), values.max(), values.std(), values.mean())
        return statistics
    
    def compare(self, other):
        stats1 = self.stats()
        stats2 = other.stats()
        for column in self.columns():
            if column not in stats1: continue
            print("Column '{0:25s}'".format(column))
            for s1, s2 in zip(stats1[column], stats2[column]):
                print("    {0} vs {1}".format(s1, s2))
    
    def plot(self, x_column, y_column):
        plt.plot(self.data[x_column], self.data[y_column], '.')

In [5]:
trees = Dataset(data)
value_types = {'Fiscal Year': 'int'}
for v in trees.columns():
    trees.convert(v, value_types.get(v, "str"))

In [6]:
data1=trees.filter_eq("Program Type","IA")
data2=trees.filter_eq("Program Type","IC")
data3=trees.filter_eq("Program Type","IG")
data4=trees.filter_eq("Program Type","IL")
data5=trees.filter_eq("Program Type","IM")
data6=trees.filter_eq("Program Type","IS")
data7=trees.filter_eq("Program Type","LE")
data8=trees.filter_eq("Program Type","LG")
data9=trees.filter_eq("Program Type","LI")
data10=trees.filter_eq("Program Type","LL")
data11=trees.filter_eq("Program Type","LT")
data12=trees.filter_eq("Program Type","MA")
data13=trees.filter_eq("Program Type","MG")
data14=trees.filter_eq("Program Type","MH")
data15=trees.filter_eq("Program Type","ML")
data16=trees.filter_eq("Program Type","MN")
data17=trees.filter_eq("Program Type","MP")
data18=trees.filter_eq("Program Type","NC")
data19=trees.filter_eq("Program Type","ND")
data20=trees.filter_eq("Program Type","NE")
data21=trees.filter_eq("Program Type","NG")
data22=trees.filter_eq("Program Type","NL")
data23=trees.filter_eq("Program Type","NO")
data24=trees.filter_eq("Program Type","NP")
data25=trees.filter_eq("Program Type","NR")
data26=trees.filter_eq("Program Type","RE")
data27=trees.filter_eq("Program Type","SP")
data28=trees.filter_eq("Program Type","ST")

In [7]:
trunk=[]
trunk.append(len(data1.data["Program Type"]) )
trunk.append(len(data2.data["Program Type"]) )
trunk.append(len(data3.data["Program Type"]) )
trunk.append(len(data4.data["Program Type"]) )
trunk.append(len(data5.data["Program Type"]) )
trunk.append(len(data6.data["Program Type"]) )
trunk.append(len(data7.data["Program Type"]) )
trunk.append(len(data8.data["Program Type"]) )
trunk.append(len(data9.data["Program Type"]) )
trunk.append(len(data10.data["Program Type"]) )
trunk.append(len(data11.data["Program Type"]) )
trunk.append(len(data12.data["Program Type"]) )
trunk.append(len(data13.data["Program Type"]) )
trunk.append(len(data14.data["Program Type"]) )
trunk.append(len(data15.data["Program Type"]) )
trunk.append(len(data16.data["Program Type"]) )
trunk.append(len(data17.data["Program Type"]) )
trunk.append(len(data18.data["Program Type"]) )
trunk.append(len(data19.data["Program Type"]) )
trunk.append(len(data20.data["Program Type"]) )
trunk.append(len(data21.data["Program Type"]) )
trunk.append(len(data22.data["Program Type"]) )
trunk.append(len(data23.data["Program Type"]) )
trunk.append(len(data24.data["Program Type"]) )
trunk.append(len(data25.data["Program Type"]) )
trunk.append(len(data26.data["Program Type"]) )
trunk.append(len(data27.data["Program Type"]) )
trunk.append(len(data28.data["Program Type"]) )

In [8]:
trunk


Out[8]:
[1397,
 1121,
 1299,
 57,
 1731,
 102,
 51,
 856,
 846,
 97,
 196,
 1871,
 39,
 103,
 168,
 240,
 64,
 10,
 29,
 23,
 2713,
 29,
 13,
 13,
 17,
 421,
 17,
 71]

In [9]:
trunk1=sorted(trunk,reverse=True)

In [10]:
trunk1


Out[10]:
[2713,
 1871,
 1731,
 1397,
 1299,
 1121,
 856,
 846,
 421,
 240,
 196,
 168,
 103,
 102,
 97,
 71,
 64,
 57,
 51,
 39,
 29,
 29,
 23,
 17,
 17,
 13,
 13,
 10]

In [11]:
x_label=["IA","IC","IG","IL","IM","IS","LE","LG","LI","LL","LT","MA",
         "MG","MH","ML","MN","MP","NC","ND","NE","NG","NL","NO","NP","NR","RE","SP","ST"]

In [12]:
type(x_label)


Out[12]:
list

In [13]:
x_number=[]
x_number=range(1,29)

In [14]:
x_number


Out[14]:
[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28]

In [15]:
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        plt.text(rect.get_x()+rect.get_width()/6., 1.01*height, '%s' % int(height),fontsize=14)

In [17]:
plt.rcParams["figure.figsize"] = (20, 20)
rect=plt.bar(x_number,trunk1,align="center",width=0.8,alpha=0.5)
plt.xticks(x_number,x_label)
plt.rc('xtick', labelsize = 15)
plt.rc('ytick', labelsize = 15) 
plt.ylabel('number of grants',fontsize = 20)
plt.xlabel('Grants Program type',fontsize=20)
plt.title('Number of Different Grants Program Types',fontsize = 20)
autolabel(rect)
plt.show()



In [ ]: