In [53]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [54]:
data = pd.read_csv("Water_Use_Average_By_Zipcode.csv")
data.head()
Out[54]:
What's going on in that last column? If those are zip codes and then latitude/longitude, we'll need to clean it up.
In [55]:
data['Location 1']
Out[55]:
In [56]:
temp = data['Location 1'][0]
temp
Out[56]:
In [57]:
def cleanZipLocation(messy):
return messy.split('\n')
print data.iloc[0][-1]
vals = data['Location 1'].apply(cleanZipLocation)
zips = [x[0] for x in vals]
locs = [x[1] for x in vals]
lattitudes = [x.split(',')[0][1:] for x in locs]
longitudes = [x.split(',')[1][0:-1] for x in locs]
data['Zip'] = zips
data['Longitude'] = longitudes
data['Latitude'] = lattitudes
del data['Location 1']
data.head()
Out[57]:
Much better.
In [74]:
for i in range(len(data)):
plt.plot([6,7,8,9,10,11,12,13],data.iloc[i][0:-3].values,marker='x')
plt.show()
In [75]:
len(data)
Out[75]:
In [78]:
data[data['FY 09/10']>55]
Out[78]:
Big water users shown above.
In [79]:
data[data['FY 12/13']>55]
Out[79]:
This is the blue line identified in the plot, the one that has increased water use over the recent years.
In [83]:
plt.plot(range(6,14),data.iloc[59][0:-3])
plt.title("Use of water in 90212 zip area")
plt.xlabel("Years")
Out[83]:
I went on Google maps, and apparently the 90212 zip code is in Beverly Hills.
Let's just go ahead and show then all individually
In [85]:
plt.figure(figsize=(50,50))
for i in range(len(data)):
plt.subplot(12,10,i+1)
plt.plot(range(6,14),data.iloc[i][0:-3])
plt.ylim([0,180])
plt.title("Zip: "+str(data.iloc[i]['Zip']))
plt.show()
In [ ]: