Using the 2013_NYC_CD_MedianIncome_Recycle.xlsx file, calculate the correlation between the recycling rate and the median income. Discuss your findings in your PR.
In [1]:
    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
import seaborn as sns
    
In [2]:
    
#Using the data from the algorithm's class, because it looks like a nice scatter plot data zu experiment with. 
#(And because my scraping exercise is not yet ready)
df_corrected = pd.read_csv("2013_NYC_CD_MedianIncome_Recycle_CORRECTED.csv")
    
In [3]:
    
fig, ax = plt.subplots(figsize =(10,5), facecolor='WhiteSmoke')
csfont = {'fontname':'DIN Condensed'}
#Code & Explanation:
#http://stackoverflow.com/questions/21321670/how-to-change-fonts-in-matplotlib-python
#To be honest, I've got no idea how this works. But it looks great!
#sns.set(color_codes=True)
np.random.seed(sum(map(ord, "correlation")))
sns.regplot(x=df_corrected['MdHHIncE'], y=df_corrected['RecycleRate'], ax=ax)
df_corrected.plot(kind='scatter', x='MdHHIncE', y='RecycleRate', ax = ax, fontsize=10, s=55, c=['Tomato'])
plt.axes
#changing color of actuall graph (frame chang is under facecolor)
ax.set_axis_bgcolor("WhiteSmoke")
#labelling, getting rid of boarders
ax.set_ylabel('Recycle Rate', **csfont, fontsize=12)
ax.set_xlabel('Median Household Income', **csfont, fontsize=12)
ax.set_title("Why Well-Off Dicstricts Should Be Recycling Even More", **csfont, fontsize=24)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
#Working on the axis grid:
plt.tick_params(
    #axis='x',
    top='off',
    which='major',
    left='off',
    right='off',
    bottom='off',
    labeltop='off',
    labelbottom='off')
ax.set_xlim((15000,135000))
#Why can't I annotate?
ax.annotate(s='Median life expectancy: 70 years', xy=(72,0), color='red')
plt.savefig('RichRecycling.pdf', transparent=True, bbox_inches='tight')
    
    
In [4]:
    
fig, ax = plt.subplots(figsize =(10,5), facecolor='WhiteSmoke')
csfont = {'fontname':'DIN Condensed'}
#Code & Explanation:
#http://stackoverflow.com/questions/21321670/how-to-change-fonts-in-matplotlib-python
#sns.set(color_codes=True)
#np.random.seed(sum(map(ord, "correlation")))
#sns.regplot(x=df_corrected['MdHHIncE'], y=df_corrected['RecycleRate'], ax=ax)
df_corrected.plot(kind='scatter', x='MdHHIncE', y='RecycleRate', ax = ax, fontsize=10, s=55, c=['Tomato'])
#changing color of actuall graph (frame chang is under facecolor)
ax.set_axis_bgcolor("WhiteSmoke")
#labelling, getting rid of boarders
ax.set_ylabel('Recycle Rate', **csfont, fontsize=12)
ax.set_xlabel('Median Household Income', **csfont, fontsize=12)
ax.set_title("Why Well-Off Dicstricts Should Be Recycling Even More", **csfont, fontsize=24)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
#Working on the axis grid:
plt.tick_params(
    #axis='x',
    top='off',
    which='major',
    left='off',
    right='off',
    bottom='off',
    labeltop='off',
    labelbottom='on')
ax.set_xlim((15000,135000))
#Why can't I annotate?
ax.annotate(s='Median life expectancy: 70 years', xy=(72,0), color='red')
plt.axes
plt.savefig('RichRecycling_without_line.pdf', transparent=True, bbox_inches='tight')
    
    
In [ ]:
    
    
In [ ]:
    
    
In [ ]: