In [2]:
import pandas as pd
In [3]:
df = pd.read_csv("Civil_List_2014.csv",
names={"DPT","NAME","ADDRESS"})
In [6]:
df.head()
Out[6]:
In [7]:
df['SAL-RATE'].describe()
Out[7]:
In [8]:
df.columns
Out[8]:
In [10]:
"DPT ".strip()
Out[10]:
In [11]:
for col in df.columns:
print(col.strip())
In [12]:
# its a list, so-[]
[col.strip() for col in df.columns]
Out[12]:
In [13]:
df.columns = [col.strip() for col in df.columns]
In [14]:
df.columns
Out[14]:
In [15]:
df['PC'].value_counts()
Out[15]:
In [16]:
df['SAL-RATE'].describe()
Out[16]:
In [18]:
def money_to_float(money_str):
return float(money_str.replace("$","").replace(",",""))
print(money_to_float("$7888.00"))
In [19]:
#money_to_float(df['SAL-RATE'])
df['SAL-RATE'].apply(money_to_float)
Out[19]:
In [20]:
df['salary']= df['SAL-RATE'].apply(money_to_float)
In [21]:
df.head()
Out[21]:
In [23]:
%matplotlib inline
In [26]:
df['salary'].hist(bins=50) #bins is 50 bars
Out[26]:
In [27]:
df['ADDRESS'].value_counts()
Out[27]:
In [29]:
agencies_df = pd.read_csv("cleaned-agencies.csv", dtype='str')
In [30]:
agencies_df.head()
Out[30]:
In [31]:
col_types={'code':'str'}
agencies_df = pd.read_csv("cleaned-agencies.csv", dtype='str', true_values='Y', false_values='N')
agencies_df.head()
Out[31]:
In [ ]:
agencies_df = pd.read_csv("cleaned-agencies.csv", na_values="-999", true_values='Y', false_values='N')
In [ ]:
df.merge(agencies_df)