This is for Task 3. (Descriptive section)
In [43]:
def describe(data):
# test sample = 't3sample.csv'
# importing data
import pandas as pd
df2 = pd.read_csv(data)
#print(df2.head())
# description of latitude
print("Latitude description: The latitude data ranges from %f to %f, with %d numbers in all,\
the mean value is %f, the standard deviation is %f. The 1st quartile value is %f, \
the median value is %f, the 3rd quartile value is %f." % (
df2["latitude"].describe()[3],df2["latitude"].describe()[7],int(df2["latitude"].describe()[0]),
df2["latitude"].describe()[1],df2["latitude"].describe()[2],df2["latitude"].describe()[4],
df2["latitude"].describe()[5],df2["latitude"].describe()[6]))
# description of longitude.
print("Longitude description: The latitude data ranges from %f to %f, with %d numbers in all,\
the mean value is %f, the standard deviation is %f. The 1st quartile value is %f, \
the median value is %f, the 3rd quartile value is %f." % (
df2["longitude"].describe()[3],df2["longitude"].describe()[7],int(df2["longitude"].describe()[0]),
df2["longitude"].describe()[1],df2["longitude"].describe()[2],df2["longitude"].describe()[4],
df2["longitude"].describe()[5],df2["longitude"].describe()[6]))
## description of catogorical data.
#import collections
from collections import Counter
# 01 words description.
print("There are %d types of categorical data, among all, the most common item is %s, with the occurrence of %d."%(
len(Counter(df2["categorical"])),Counter(df2["categorical"]).most_common(1)[0][0],Counter(df2["categorical"]).most_common(1)[0][1]))
# 02 detect all the catogories and plot the bar plot (sorted)
import collections
cc=collections.Counter(df2["categorical"])
vv=[]
kk=[]
for v,k in sorted(cc.items(), key=lambda i: i[1], reverse=True):
vv.append(v)
kk.append(k)
objects = vv
import numpy as np
y_pos = np.arange(len(objects))
performance = kk
import matplotlib.pyplot as plt
plt.bar(y_pos, performance, align='center', alpha=0.5)
plt.xticks(y_pos, objects)
plt.ylabel('number of grants')
plt.title('Program types vs their counts')
plt.show()
# 03 pie chart
import matplotlib.pyplot as plt
# Pie chart, where the slices will be ordered and plotted counter-clockwise:
labels = tuple(vv)
sizes = [x/len(df2["categorical"])*100 for x in kk]
fig1, ax1 = plt.subplots()
ax1.pie(sizes, labels=labels, autopct='%1.1f%%',
shadow=True, startangle=90)
ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()
In [44]:
describe('t3sample.csv')
In [ ]: