In [15]:
import pandas as pd
# import matplotlib.pyplot as plt
# %matplotlib inline

In [24]:
df_android = pd.read_csv('android_1gb.csv')
df_ios = pd.read_csv('ios_1gb.csv')

In [27]:
readables = ['AppSiteCategory', 'Position', 
             'Age', 'Gender',
             'DeviceType', 
            'CreativeCategory']

# Country?
# OS

countries = pd.concat([df_android, df_ios], ignore_index=True)['Country'].unique()

# Outcome
# TrafficType
# Model 
# Carrier 
# GeoType
# Manufacturer

In [30]:
def cleanup(df):
    # df = df.drop(['Unnamed: 0'], axis=1)
    df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='s')
    df['Outcome'] = df['Outcome'].replace('c', 1).replace('w', 0)
    df = df[(df['Gender'] != '0') & (df['Age'] != 0)]
    df = df[df['OS'] != '0']
    df = df[df['CreativeCategory'] != '0']
    return df 

def resample_bid(df):
    return df.set_index('Timestamp')[['ExchangeBid']].resample('5T', how='sum')

def return_counts():
    for item in readables:
        print(item + ": \n",df[item].value_counts()[:5],"\n"+"="*15)
        
def corr_bidwins_category(df):
    # #df_corr.reset_index()
    # df_corr = df.set_index(['Country', 'AppSiteCategory', 'Gender', 'Age']).sortlevel(0)
    # df_corr#['ExchangeBid']#.groupby(level=[0,1,2,3]).mean()
    df_country_bids = df[['Country', 'CreativeCategory', 'ExchangeBid']].sort_values(by='Country').set_index(['Country', 'CreativeCategory'])
    df_con_corr = df_country_bids.groupby(level=1).mean().sort_values(by='ExchangeBid')
    return df_con_corr

In [28]:
df_android = cleanup(df_android)
df_ios = cleanup(df_ios)

Correlation b/w bid winnings and creative category


In [31]:
df_con_corr = corr_bidwins_category(df_android)
# df_con_corr.plot(figsize=(16,8), kind='bar', linewidth=2)

In [32]:
df_con_corr


Out[32]:
ExchangeBid
CreativeCategory
Air Travel#Business Travel 0.019854
Marketing 0.024065
Comparison#Couponing 0.056542
Financial Planning#Investing 0.059482
Cocktails/Beer#Wine 0.061391
Board Games/Puzzles#Card Games 0.065000
Stocks#Options 0.078119
Internet Technology 0.086711
Advertising#Cell Phones 0.102780
Computer Peripherals#Advertising 0.113724
Home Theater#Appliances 0.130659
Credit/Debt & Loans#Financial Planning 0.140000
Cell Phones#Internet Technology 0.152847
Coffee/Tea#Chinese Cuisine 0.154670
Comparison 0.157803
Buying/Selling Cars#Performance Vehicles 0.160501
Insurance#Investing 0.180754
Advertising 0.185077
Beauty 0.194239
Vegetarian#Wine#Comparison 0.200123
American Cuisine#Barbecues & Grilling 0.206667
American Cuisine#Barbecues & Grilling#Wine#Vegetarian#Dining Out#Desserts & Baking 0.222687
Budget Travel#Business Travel 0.245000
Couponing 0.246375
Advertising#Couponing 0.250147
Investing 0.253750
Television 0.257979
Pickup 0.301208
Fashion#Accessories#Comparison 0.311360
Buying/Selling Cars#Car Culture 0.367478
... ...
Budget Travel#Business Travel#Traveling with Kids 0.778942
Air Travel 0.780612
Buying/Selling Cars 0.795000
Options 0.831000
Advertising#Comparison 0.867983
Coffee/Tea#Dining Out 0.870756
Movies#Celebrity Fan/Gossip 1.082197
Auto Repair#Advertising 1.120000
Hotels#Air Travel 1.376667
Society#Comparison#Accessories 1.410000
Advertising#Appliances 1.536864
Honeymoons/Getaways#Hotels 1.755715
Clothing#Accessories#Comparison#Society 1.785447
Roleplaying Games#Sci-Fi & Fantasy 1.880161
Comparison#Couponing#Clothing#Fashion#Accessories 1.882773
Board Games/Puzzles#Roleplaying Games 2.151724
Couponing#Comparison#Accessories 2.430313
Board Games/Puzzles 2.507437
Cell Phones#Comparison 2.673899
Roleplaying Games#Board Games/Puzzles 2.679379
Movies 2.826809
Accessories 3.114848
Society#Internet Technology 3.916186
Comparison#Couponing#Accessories 3.935802
Comparison#Fashion 4.463784
Roleplaying Games 4.540523
Cell Phones#By US Locale 4.970326
Video & Computer Games#Roleplaying Games 4.980000
Comparison#Clothing 7.756160
Card Games 7.827262

76 rows × 1 columns