In [1]:
!wget https://raw.githubusercontent.com/dwillis/smpa3193-exercises/master/arrest.csv
In [30]:
import agate
import agatestats
from Levenshtein import distance
#agatestats.patch()
results = agate.Table.from_csv("arrest.csv")
In [5]:
print(results)
In [6]:
print(results.print_table(5))
In [7]:
smelly_data = results.where(lambda x: x['Charge Descrip'] is None)
smelly_data.print_table()
In [8]:
results = results.compute([
('LName', agate.Formula(agate.Text(), lambda row: row['LName'].strip() if row['LName'] else None, results.columns['LName'])),
('MName', agate.Formula(agate.Text(), lambda row: row['MName'].strip() if row['MName'] else None, results.columns['MName'])),
('FName', agate.Formula(agate.Text(), lambda row: row['FName'].strip() if row['FName'] else None, results.columns['FName']))
], replace=True)
In [9]:
age_distribution = results.pivot('Age').order_by('Count', True)
age_distribution.print_table()
In [10]:
age_distribution.print_bars('Age', 'Count')
In [27]:
results.stdev_outliers('Age', deviations=3, reject=False).print_table()
In [86]:
charges = results.group_by('Charge')
charges.aggregate([('Mean Age', agate.Mean('Age'))]).print_table()
In [61]:
results = results.compute([
('city', agate.Formula(agate.Text(), lambda x: x['Address'].split(',')[-2].strip()))
], replace=True)
In [81]:
alexandria_begin = results.where(lambda x: x['city'].startswith('ALEX') if x['city'] else False)
len(alexandria_begin)
Out[81]:
In [88]:
search_alexandria = results.where(lambda x: distance(x['city'], 'ALEX') <= 3 if x['city'] else False)
len(search_alexandria)
Out[88]: