In [1]:
import numpy as np
import pandas as pd
import os,sys
from ast import literal_eval
Read the regional paygap data:
In [2]:
df=pd.read_csv('../paygap_region_data.csv')
df.head()
Out[2]:
Extract the column of pay gap values:
In [3]:
paygap=df['MeanDiff']
zmin=min(paygap)
zmax=max(paygap)
Set up a function that converts the values to RGB colour codes:
In [4]:
def get_colour_for_val(val, plotly_scale, colours01, vmin, vmax):
if (vmin>=vmax):
raise ValueError('vmin should be < vmax')
v = (val - vmin) / float((vmax - vmin))
idx = 0
while(v>plotly_scale[idx+1]):
idx+=1
left_scale_val = plotly_scale[idx]
right_scale_val = plotly_scale[idx+1]
vv=(v - left_scale_val)/(right_scale_val - left_scale_val)
val_colour01=colours01[idx]+vv*(colours01[idx+1]-colours01[idx])
val_colour_0255=map(np.uint8, 255*val_colour01+0.5)
return 'rgb'+str(tuple(val_colour_0255))
Define the colour scale:
In [5]:
pl_colourscale= [[0.0, 'rgb(255, 255, 204)'],
[0.35, 'rgb(161, 218, 180)'],
[0.5, 'rgb(65, 182, 196)'],
[0.6, 'rgb(44, 127, 184)'],
[0.7, 'rgb(8, 104, 172)'],
[1.0, 'rgb(37, 52, 148)']]
Make the corresponding colours and scales in a mutable form:
In [6]:
pl_scale, pl_colours=map(float, np.array(pl_colourscale)[:,0]), np.array(pl_colourscale)[:,1]
colours01=np.array(map(literal_eval,[colour[3:] for colour in pl_colours] ))/255.0
Calculate the corresponding colour for each pay gap value:
In [8]:
facecolour=[get_colour_for_val(z, pl_scale, colours01, zmin, zmax) for z in paygap]
Put the colours into a column of the pandas dataframe:
In [ ]:
# find out how to add a column of data to a pandas dataframe
df['Colours'] = facecolour
df.head()
Write out the updated csv file:
In [ ]:
# write output csv file:
outfile='paygap_region_data_with_colours.csv'
if os.path.exists(outfile):
os.sys('rm -rf '+outfile+' \n')
df.to_csv(outfile)
In [ ]:
In [ ]: