In [2]:
import os
import charts_function_list
In [3]:
base,data,outputs = charts_function_list.folder_setup()
os.chdir(data)
In [29]:
inflow = pd.read_csv('countyinflow1516.csv')
outflow = pd.read_csv('countyoutflow1516.csv')
#rename some columns to make them more informative
inflow = inflow.rename(columns={'y2_statefips':'Destination State FIPS','y2_countyfips':'Destination County FIPS',
'y1_statefips':'Origin State FIPS','y1_countyfips':'Origin County FIPS',
'y1_state':'Origin State','y1_countyname':'Origin County',
'n1':'Number of Returns','n2':'Number of Exemptions','agi':'Adjusted Gross Income'})
outflow = outflow.rename(columns={ 'y1_statefips':'Origin State FIPS','y1_countyfips':'Origin County FIPS',
'y2_statefips':'Destination State FIPS','y2_countyfips':'Destination County FIPS',
'y2_state':'Destination State','y2_countyname':'Destination County',
'n1':'Number of Returns','n2':'Number of Exemptions','agi':'Adjusted Gross Income'})
la_county_in = inflow[(inflow['Destination County FIPS']==37)&(inflow['Destination State FIPS']==6)]
la_county_in = la_county[(~la_county['Origin State FIPS'].isin([96,97,98,57,59]))&(la_county['Origin County']!='Los Angeles County Non-migrants')]
la_county_out = outflow[(outflow['Origin County FIPS']==37)&(outflow['Origin State FIPS']==6)]
la_county_out = la_county_out[(~la_county_out['Destination State FIPS'].isin([96,97,98,57,59]))&(la_county_out['Destination County']!='Los Angeles County Non-migrants')]
la_county_out= la_county_out[['Destination State FIPS',
'Destination County FIPS', 'Destination State', 'Destination County',
'Number of Returns', 'Number of Exemptions', 'Adjusted Gross Income']].rename(columns={'Destination State FIPS':'State FIPS',
'Destination County FIPS':'County FIPS',
'Destination State':'State',
'Destination County':'County'})
la_county_out['Flow Direction'] = 'Out'
la_county_in = la_county_in[['Origin State FIPS','Origin County FIPS','Origin State','Origin County',
'Number of Returns', 'Number of Exemptions', 'Adjusted Gross Income']].rename(columns={'Origin State FIPS':'State FIPS',
'Origin County FIPS':'County FIPS',
'Origin State':'State',
'Origin County':'County'})
la_county_in['Flow Direction'] = 'In'
In [114]:
# summary counts
state_count_returns_in = la_county_in.groupby('State').sum()['Number of Returns']
state_count_exmemptions_in = la_county_in.groupby('State').sum()['Number of Exemptions']
state_count_returns_out = la_county_out.groupby('State').sum()['Number of Returns']
state_count_exemptions_out = la_county_out.groupby('State').sum()['Number of Exemptions']
In [ ]:
#for net migration from Census
CA = pd.read_excel('county-to-county-2012-2016-ins-outs-nets-gross.xlsx',sheet_name='California',)
#multi_columns = CA.loc[0:1].values
#create multi-index for columns
#column_1 = pd.MultiIndex.from_arrays(multi_columns)
#reset columns to multi-index
#CA_.columns = column_1
#drop MOE columns -- the columns with null values for the first level of the index
#CA_ = CA_[column_1[~column_1.get_level_values(0).isnull()]].copy()
CA_ = CA.loc[2:]
new_columns = [str(a)+':'+str(b) for a, b in zip(CA.loc[0].values,CA.loc[1].values)]
CA_.columns = new_columns
CA_ = CA_[CA_.columns[['nan:MOE' not in x for x in CA_.columns]]] #remove the MOE columns
CA_.columns = [x.replace(':nan','') for x in CA_.columns] #replace the "nan" fields
los_angeles_county = CA_[CA_['County Name of Geography A']=='Los Angeles County'].copy()
In [274]:
domestic = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
'Colorado', 'Connecticut', 'Delaware', 'District of Columbia',
'Florida', 'Georgia', 'Hawaii', 'Idaho', 'Illinois', 'Indiana',
'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland',
'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi',
'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire',
'New Jersey', 'New Mexico', 'New York', 'North Carolina',
'North Dakota', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania',
'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee',
'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
'West Virginia', 'Wisconsin', 'Wyoming', 'Puerto Rico']
los_angeles_county['Geography B - Domestic or International'] = np.where(los_angeles_county['State/U.S. Island Area/Foreign Region of Geography B'].isin(domestic),'Domestic','International')
la_domestic = los_angeles_county[los_angeles_county['Geography B - Domestic or International']=='Domestic'].sort_values(by='Net Migration from Geography B to Geography A1:Estimate',ascending=False).copy()
In [275]:
la_county_in_ACS = la_domestic.groupby('State/U.S. Island Area/Foreign Region of Geography B').sum()['Flow from Geography B to Geography A:Estimate']
la_county_out_ACS = la_domestic.groupby('State/U.S. Island Area/Foreign Region of Geography B').sum()[ 'Counterflow from Geography A to Geography B1:Estimate']
In [277]:
state_comparison = pd.merge(pd.DataFrame(la_county_in_ACS),pd.DataFrame(la_county_out_ACS),left_index=True,right_index=True).rename(columns={'Flow from Geography B to Geography A:Estimate':'In-Flow',
'Counterflow from Geography A to Geography B1:Estimate':'Outflow'})
state_comparison['Net Migration'] = state_comparison['In-Flow']-state_comparison['Outflow']
state_comparison['Los Angeles County'] = 'Los Angeles County'
In [283]:
os.chdir(outputs)
state_comparison.to_csv('state_comparisons.csv')
la_domestic.to_csv('LA_domestic_flow.csv')
In [425]:
no_cal_state = state_comparison[state_comparison.index!='California'].sort_values(by='Net Migration').copy()
In [463]:
in_migration = charts_function_list.chart_maker(title = 'Los Angeles County Migration by State\n2012-2016 Average',title_size=22)
fig = in_migration.initial_fig_axis(figsize=(11,11))
fig.subplots_adjust(top=.9)
ax = in_migration.axes_set_up(fig=fig)
ax.barh([i for i,v in enumerate(no_cal_state.index)],no_cal_state['In-Flow'],zorder=2,color='#FA9592')
ax.barh([i for i,v in enumerate(no_cal_state.index)],no_cal_state['Outflow']*-1,zorder=2,color='#B2B81D')
ax.tick_params(axis='x',labelsize=16)
ax.tick_params(axis='y',labelsize=14)
yticks = ax.set_yticks([x for x in range(51)])
yticklabels = ax.set_yticklabels(no_cal_state.index,va='center',position=(-.02,0))
xticklabels = ax.set_xticklabels(["{:,}".format(int(x)) for x in ax.get_xticks()])
#ax.vlines(0,ymin=0,ymax=1,transform=ax.get_xaxis_transform(),linewidth=1,alpha=.7,zorder=3)
cutoff = float((no_cal_state.reset_index().loc[no_cal_state.reset_index()['Net Migration']>=0].head(1).index-.5).values)
ax.hlines(cut_off,xmin=-1,xmax=1,transform=ax.get_yaxis_transform(),linewidth=1,alpha=.7,zorder=5,linestyle='--')
for i,v in enumerate(no_cal_state['In-Flow'].values):
ax.text(v+30,i,str("{:,}".format(v)),va='center',style='italic')
for i,v in enumerate((no_cal_state['Outflow']*-1).values):
ax.text(v-30,i,str("{:,}".format(v)),va='center',style='italic',ha='right')
ax.text(.4,cutoff+2,'Net Gain',ha='right',transform=ax.get_yaxis_transform(),fontsize=16,alpha=.5,color='Blue')
ax.text(.4,cutoff-2,'Net Loss',ha='right',transform=ax.get_yaxis_transform(),fontsize=16,alpha=.5,color='Blue')
ax.set_ylim(-.5,50.4)
ax.grid(alpha=.4,zorder=1)
ax.text(.25,-.08,'OUT-MIGRATION',transform=ax.transAxes,ha='center',fontsize=14,color='red',alpha=.7)
ax.text(.75,-.08,'IN-MIGRATION',transform=ax.transAxes,ha='center',fontsize=14,color='red',alpha=.7)
sources = in_migration.citations(ax=ax,source_y=-.15,source='Source: American Community Survey, 2016',chart_tag='')
os.chdir(outputs)
charts_function_list.chart_save('migration_patterns')
In [403]:
no_cal_state['Outflow'].apply(lambda x: "{:,}".format(x))
Out[403]:
In [395]:
no_cal_state.sort_values(by='In-Flow',ascending=False).head(10)[['In-Flow','Net Migration']]
Out[395]:
In [361]:
for i,v in enumerate(no_cal_state[['Net Migration','In-Flow']].values):
print(v)
In [ ]: