In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import calendar

In [2]:
test_date = dt.datetime(2017,8,25)

In [3]:
df = pd.read_csv("transactions.csv")
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index(df['Date']).drop('Date',axis=1)

In [4]:
x = pd.to_datetime('2017-08-25')

In [5]:
def process_raw(tab):
    df = tab
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.set_index(df['Date']).drop('Date',axis=1)
    return df

In [6]:
process_raw(pd.read_csv("transactions.csv"))


Out[6]:
Description Original Description Amount Transaction Type Category Account Name Labels Notes
Date
2017-09-09 Gig Car Share GIG CAR SHARE 0.50 debit Transportation & Taxi Discover NaN NaN
2017-09-09 Gig Car Share GIG CAR SHARE 0.50 debit Transportation & Taxi Discover NaN NaN
2017-09-10 Lyft LYFT *RIDE SUN 2PM 855-280-0278 CA 15.38 debit Transportation & Taxi Discover NaN NaN
2017-09-10 Lyft LYFT *RIDE SUN 4PM 855-280-0278 CA 10.88 debit Transportation & Taxi Discover NaN NaN
2017-09-09 Cafe Durant CAFE DURANT BERKELEY CA 9.66 debit Restaurants Discover NaN NaN
2017-09-08 Diyar Mediterranean Cusi DIYAR MEDITERRANEAN CUSI BERKELEY CA 8.73 debit Restaurants Discover NaN NaN
2017-08-31 Raleighs Pub RALEIGHS PUB BERKELEY CA 10.87 debit Restaurants Discover NaN NaN
2017-08-31 Internet Thank You INTERNET PAYMENT - THANK YOU 225.61 credit Credit Card Payment Discover NaN NaN
2017-08-29 Internet Thank You INTERNET PAYMENT - THANK YOU 155.53 credit Credit Card Payment Discover NaN NaN
2017-08-29 Ucb Recreational Sports UCB RECREATIONAL SPORTS 510-642-7796 CA 25.00 debit Entertainment Discover NaN NaN
2017-08-27 Berkeley Bowl Markesbe BERKELEY BOWL MARKESBE BERKELEY CA 200.61 debit Food Discover NaN NaN
2017-08-26 Chengdu Style Restaurant CHENGDU STYLE RESTAURANT BERKELEY CA 63.00 debit Restaurants Discover NaN NaN
2017-08-25 V A Cafe SQ *V&A CAFE BERKELEY CA0002305843011213890465 3.00 debit Restaurants Discover NaN NaN
2017-08-25 Lyft LYFT *RIDE FRI 12PM 855-280-0278 CA 11.69 debit Transportation & Taxi Discover NaN NaN
2017-08-25 Lyft LYFT *RIDE FRI 12PM 855-280-0278 CA 35.11 debit Transportation & Taxi Discover NaN NaN
2017-08-25 L Epi D SQ *L'EPI D'OR BAKERY CUPERTINO CA000115292150... 4.99 debit Restaurants Discover NaN NaN
2017-08-25 Lyft LYFT *RIDE FRI 10AM 855-280-0278 CA 30.85 debit Transportation & Taxi Discover NaN NaN
2017-08-24 Transfer from BofA Core Checking INTERNET PAYMENT - THANK YOU 383.82 credit Credit Card Payment Discover NaN NaN
2017-08-23 Laval's Pizza LAVAL'S PIZZA BERKELEY CA 6.89 debit Restaurants Discover NaN NaN
2017-08-21 Chick-Fil-A CHICK-FIL-A #02765 WALNUT CREEK CA 8.39 debit Restaurants Discover NaN NaN
2017-08-21 Tea 1 SQ *TEA 1 BERKELEY BERKELEY CA0002305843011209... 4.25 debit Restaurants Discover NaN NaN
2017-08-21 Amazon AMAZON.COM AMZN.COM/BILLWA6M1Q94EWTEI 15.32 debit Shopping Discover NaN NaN
2017-08-20 Amazon AMAZON.COM AMZN.COM/BILLWA49VLZN1FMGG 15.42 debit Shopping Discover NaN NaN
2017-08-20 Lyft LYFT *RIDE SUN 7PM 855-280-0278 CA 6.50 debit Transportation & Taxi Discover NaN NaN
2017-08-19 Comcast COMCAST CALIFORN CS 1X 800-266-2278 CA73603189... 39.99 debit Entertainment Discover NaN NaN
2017-08-18 Lyft LYFT *RIDE FRI 8PM 855-280-0278 CA 7.09 debit Transportation & Taxi Discover NaN NaN
2017-08-17 Laval's Pizza LAVAL'S PIZZA BERKELEY CA 8.75 debit Restaurants Discover NaN NaN
2017-08-13 Diyar Mediterranean Cusi DIYAR MEDITERRANEAN CUSI BERKELEY CA 18.55 debit Restaurants Discover NaN NaN
2017-08-12 Lyft LYFT *RIDE SAT 3PM 855-280-0278 CA 6.50 debit Transportation & Taxi Discover NaN NaN
2017-08-12 Safeway SAFEWAY STORE 0691 BERKELEY CA 93.20 debit Food Discover NaN NaN
... ... ... ... ... ... ... ... ...
2016-11-10 Ucafe Berkeley SQ *UCAFE, BERKELEY BERKELEY CA000115292150743... 2.75 debit Restaurants Discover NaN NaN
2016-11-10 La Val's Pizza LAVAL'S PIZZA BERKELEY CA 9.30 debit Restaurants Discover NaN NaN
2016-11-09 Dumpling Express DUMPLING EXPRESS BERKELEY CA 8.54 debit Restaurants Discover NaN NaN
2016-11-06 Ation 723 Tea Street 723 TEA STATION BERKELEY CA 8.27 debit Restaurantss Discover NaN NaN
2016-11-04 Amazon AMAZON PICKUP POINTS 877-345-0975 WAVHOYB211YRP 25.61 debit Shopping Discover NaN NaN
2016-11-03 Artichokes SQ *ARTICHOKE BASILLE' BERKELEY CA000115292150... 5.20 debit Restaurants Discover NaN NaN
2016-11-02 Cafe Strada Berkeley CAFE STRADA, INC BERKELEY CA 2.95 debit Restaurantss Discover NaN NaN
2016-11-02 Amazon AMAZON.COM AMZN.COM/BILLWAS92AN8Q6MPU 12.41 debit Shopping Discover NaN NaN
2016-10-30 Dumpling Express DUMPLING EXPRESS BERKELEY CA 8.54 debit Restaurants Discover NaN NaN
2016-10-30 Amazon AMAZON MKTPLACE PMTS AMZN.COM/BILLWAQXI26QETC0G 10.94 debit Shopping Discover NaN NaN
2016-10-30 Dumpling Express DUMPLING EXPRESS BERKELEY CA 8.54 debit Restaurants Discover NaN NaN
2016-10-29 Amazon AMAZON MKTPLACE PMTS AMZN.COM/BILLWAQXIRBS5R0F1 25.97 debit Shopping Discover NaN NaN
2016-10-26 Sharetea SQ *SHARETEA BERKELEY CA0001152921507419964410 4.10 debit Restaurants Discover NaN NaN
2016-10-24 Amazon AMAZON PICKUP POINTS 877-345-0975 WAEIYIHN10TWM 17.37 debit Shopping Discover NaN NaN
2016-10-23 Internet Thank You INTERNET PAYMENT - THANK YOU 136.10 credit Credit Card Payment Discover NaN NaN
2016-10-23 Ucb Housing Dining UCB HOUSING&DINING SVCS 510-643-6955 CA 20.00 debit Transfer for Cash Spending Discover NaN NaN
2016-10-16 Amazon AMAZON PICKUP POINTS 877-345-0975 WAG4Q9R3IKRN9 14.22 debit Shopping Discover NaN NaN
2016-10-15 Dumpling Express DUMPLING EXPRESS BERKELEY CA 8.04 debit Restaurants Discover NaN NaN
2016-10-15 Amazon AMAZON MKTPLACE PMTS AMZN.COM/BILLWAKYG48B5SZM0 16.95 debit Shopping Discover NaN NaN
2016-10-15 Amazon AMAZON PICKUP POINTS 877-345-0975 WAI8CZU4V6UT0 11.60 debit Shopping Discover NaN NaN
2016-10-14 Cafe Strada Berkeley CAFE STRADA, INC BERKELEY CA 1.95 debit Restaurantss Discover NaN NaN
2016-10-07 Dumpling Express DUMPLING EXPRESS BERKELEY CA 8.54 debit Restaurants Discover NaN NaN
2016-10-01 Sweetheart Cafe SQ *SWEETHEART CAFE BERKELEY CA000115292150739... 3.11 debit Restaurantss Discover NaN NaN
2016-09-30 Amazon AMAZON PICKUP POINTS 877-345-0975 WAQNFQ0I2N5VZ 12.46 debit Shopping Discover NaN NaN
2016-09-26 Amazon AMAZON MKTPLACE PMTS AMZN.COM/BILLWAPSZW18FL65W 12.95 debit Shopping Discover NaN NaN
2016-09-26 Simply Bowl SIMPLY BOWL BERKELEY CA 11.50 debit Restaurants Discover NaN NaN
2016-09-25 Amazon AMAZON MKTPLACE PMTS AMZN.COM/BILLWAILEP1GHK5UI 4.95 debit Shopping Discover NaN NaN
2016-09-25 Ucb Housing Dining UCB HOUSING&DINING SVCS 510-643-6955 CA 20.00 debit Transfer for Cash Spending Discover NaN NaN
2016-09-24 Amazon AMAZON PICKUP POINTS 877-345-0975 WATZP495DA69Q 9.83 debit Shopping Discover NaN NaN
2016-09-24 Internet Thank You INTERNET PAYMENT - THANK YOU 234.66 credit Credit Card Payment Discover NaN NaN

288 rows × 8 columns


In [7]:
to = dt.datetime.today()

In [8]:
categories = set(df['Category'])
print(categories)


{'Entertainment', 'Restaurants', 'Transportation & Taxi', 'Restaurantss', 'Apparel and Clothing', 'Transfer for Cash Spending', 'Health Care', 'Clothing', 'Mortgage & Rent', 'Advertising', 'Sporting Goods', 'Transportation', 'Pharmacy', 'Tuition', 'Food', 'Credit Card Payment', 'Gift', 'Shopping'}

In [9]:
getRangeExpenditure(df, to,to)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-c0f4d9560f9c> in <module>()
----> 1 getRangeExpenditure(df, to,to)

NameError: name 'getRangeExpenditure' is not defined

In [ ]:
def getTodayExpenditure(tab, today=dt.datetime.today()):
    '''gets amount of money spent toady'''
    if (today not in tab.index):
        return 0
    today_transactions = tab.loc[today.date()]
    return sum(today_transactions['Amount'])

In [ ]:
test = getTodayExpenditure(df,test_date)
print("$",round(test,2))

In [ ]:
to

In [ ]:
to - dt.timedelta(days=7)

In [ ]:
a = [i for i in pd.date_range(to, to).values if i in df.index.values]

In [ ]:
getFrequencies(df, test_date,test_date)

In [ ]:
date_range = [i for i in pd.date_range(test_date, test_date).values if i in df.index.values]

In [ ]:
transaction_range

In [ ]:
def getRangeExpenditure(tab, date_start, date_end, category=None):
    '''returns the amount of money spent in date range'''
    date_range = [i for i in pd.date_range(date_start, date_end).values if i in tab.index.values]
    if (len(date_range)==0):
        return 0
    today_transactions = tab.loc[date_range]
    today_transactions.head()
    if (category==None):
        return sum(today_transactions['Amount'])
    else:
        return sum(today_transactions[today_transactions['Category']==category]['Amount'])

In [ ]:
getRangeExpenditure(df, (dt.datetime.today()-dt.timedelta(days=60)).date(),dt.datetime.today().date(), 'Restaurants')

In [ ]:
def getFrequencies(tab, start_day, end_day, freq='daily'):
    date_range = [i for i in pd.date_range(start_day, end_day).values if i in tab.index.values]
    out = create_categories_map(tab)
    if (len(date_range)==0):
        return out
    transaction_range = tab.loc[date_range]
    out_val = transaction_range.groupby('Category')['Amount'].sum().to_dict()
    diff_days = (end_day - start_day).days
    if (diff_days==0):
        diff_days = 1
        
    if (freq == 'weekly'):
        r = diff_days/7
    elif (freq=='monthly'):
        r = diff_days/30
    elif (freq=='yearly'):
        r = diff_days/365
    else:
        r = diff_days
    for k in out_val:
        out_val[k] = out_val[k]/r
        out[k] = out_val[k]
    return out

In [ ]:
def getFrequenciesCounts(tab, start_day, end_day, freq='daily'):
    date_range = [i for i in pd.date_range(start_day, end_day).values if i in tab.index.values]
    out = create_categories_map(tab)
    if (len(date_range)==0):
        return out
    transaction_range = tab.loc[date_range]
    out_val = transaction_range['Category'].value_counts().to_dict()
    diff_days = (end_day - start_day).days
    if (diff_days==0):
        diff_days = 1
        
    if (freq == 'weekly'):
        r = diff_days/7
    elif (freq=='monthly'):
        r = diff_days/30
    elif (freq=='yearly'):
        r = diff_days/365
    else:
        r = diff_days
    for k in out_val:
        out_val[k] = out_val[k]/r
        out[k] = out_val[k]
    return out

In [ ]:
getFrequenciesCounts(df, x, to)

In [ ]:
def getOccurrencies(tab, start_day, end_day, freq='daily'):
    transaction_range = tab.loc[[i for i in pd.date_range(start_day, end_day).values if i in tab.index.values]]
    out = transaction_range['Category'].value_counts().to_dict()
    return out

In [ ]:
(test_date+dt.timedelta(days=2)).weekday()

In [ ]:
def getFrequencyDate(time, freq='daily'):
    """returns the start and end dates based on the given frequency
    possible freq = 'daily', 'weekly', 'monthly', 'yearly' """
    if (freq=='weekly'):
        start = time - dt.timedelta(days=time.weekday())
        end = start + dt.timedelta(days=6)
    elif (freq=='monthly'):
        start = time.replace(day=1)
        start_day, end_day = calendar.monthrange(time.year,time.month)
        end = time.replace(day=end_day)
    elif (freq=='yearly'):
        start = time.replace(month=1).replace(day=1)
        end = time.replace(month=12).replace(day=31)
    else:
        start = time
        end = time
    return start, end

In [ ]:
x = pd.to_datetime(to)

In [ ]:
overall_f = full_category_frequencies(df,'weekly')

In [ ]:
overall_f

In [ ]:
def full_category_frequencies(tab, freq='Daily'):
    return getFrequencies(tab, min(tab.index), max(tab.index), freq)

In [ ]:
def full_category_frequencies_counts(tab, freq='Daily'):
    return getFrequenciesCounts(tab, min(tab.index), max(tab.index), freq)

In [ ]:
df[df['Description'] == 'Tea Cory Hall']

In [ ]:
df.index

In [ ]:
def create_categories_map(tab, label='Category'):
    cat_set = set(tab[label])
    out = dict()
    for c in cat_set:
        out[c] = 0
    return out

In [ ]:
getFrequencies(df, pd.to_datetime("2017-08-01"),pd.to_datetime("2017-09-01"),'weekly')

In [ ]:
full_category_frequencies(df, 'weekly')

In [ ]:
df[df['Category']=='Credit Card Payment']

In [ ]:
data = pd.read_excel('age.xls')

In [ ]:
data.head()

In [ ]: