Using the 2013_NYC_CD_MedianIncome_Recycle.xlsx file, calculate the correlation between the recycling rate and the median income. Discuss your findings in your PR.
In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
import seaborn as sns
In [2]:
#Using the data from the algorithm's class, because it looks like a nice scatter plot data zu experiment with.
#(And because my scraping exercise is not yet ready)
df_corrected = pd.read_csv("2013_NYC_CD_MedianIncome_Recycle_CORRECTED.csv")
In [3]:
fig, ax = plt.subplots(figsize =(10,5), facecolor='WhiteSmoke')
csfont = {'fontname':'DIN Condensed'}
#Code & Explanation:
#http://stackoverflow.com/questions/21321670/how-to-change-fonts-in-matplotlib-python
#To be honest, I've got no idea how this works. But it looks great!
#sns.set(color_codes=True)
np.random.seed(sum(map(ord, "correlation")))
sns.regplot(x=df_corrected['MdHHIncE'], y=df_corrected['RecycleRate'], ax=ax)
df_corrected.plot(kind='scatter', x='MdHHIncE', y='RecycleRate', ax = ax, fontsize=10, s=55, c=['Tomato'])
plt.axes
#changing color of actuall graph (frame chang is under facecolor)
ax.set_axis_bgcolor("WhiteSmoke")
#labelling, getting rid of boarders
ax.set_ylabel('Recycle Rate', **csfont, fontsize=12)
ax.set_xlabel('Median Household Income', **csfont, fontsize=12)
ax.set_title("Why Well-Off Dicstricts Should Be Recycling Even More", **csfont, fontsize=24)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
#Working on the axis grid:
plt.tick_params(
#axis='x',
top='off',
which='major',
left='off',
right='off',
bottom='off',
labeltop='off',
labelbottom='off')
ax.set_xlim((15000,135000))
#Why can't I annotate?
ax.annotate(s='Median life expectancy: 70 years', xy=(72,0), color='red')
plt.savefig('RichRecycling.pdf', transparent=True, bbox_inches='tight')
In [4]:
fig, ax = plt.subplots(figsize =(10,5), facecolor='WhiteSmoke')
csfont = {'fontname':'DIN Condensed'}
#Code & Explanation:
#http://stackoverflow.com/questions/21321670/how-to-change-fonts-in-matplotlib-python
#sns.set(color_codes=True)
#np.random.seed(sum(map(ord, "correlation")))
#sns.regplot(x=df_corrected['MdHHIncE'], y=df_corrected['RecycleRate'], ax=ax)
df_corrected.plot(kind='scatter', x='MdHHIncE', y='RecycleRate', ax = ax, fontsize=10, s=55, c=['Tomato'])
#changing color of actuall graph (frame chang is under facecolor)
ax.set_axis_bgcolor("WhiteSmoke")
#labelling, getting rid of boarders
ax.set_ylabel('Recycle Rate', **csfont, fontsize=12)
ax.set_xlabel('Median Household Income', **csfont, fontsize=12)
ax.set_title("Why Well-Off Dicstricts Should Be Recycling Even More", **csfont, fontsize=24)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
#Working on the axis grid:
plt.tick_params(
#axis='x',
top='off',
which='major',
left='off',
right='off',
bottom='off',
labeltop='off',
labelbottom='on')
ax.set_xlim((15000,135000))
#Why can't I annotate?
ax.annotate(s='Median life expectancy: 70 years', xy=(72,0), color='red')
plt.axes
plt.savefig('RichRecycling_without_line.pdf', transparent=True, bbox_inches='tight')
In [ ]:
In [ ]:
In [ ]: