In [15]:
import pandas as pd
# import matplotlib.pyplot as plt
# %matplotlib inline
In [24]:
df_android = pd.read_csv('android_1gb.csv')
df_ios = pd.read_csv('ios_1gb.csv')
In [27]:
readables = ['AppSiteCategory', 'Position',
'Age', 'Gender',
'DeviceType',
'CreativeCategory']
# Country?
# OS
countries = pd.concat([df_android, df_ios], ignore_index=True)['Country'].unique()
# Outcome
# TrafficType
# Model
# Carrier
# GeoType
# Manufacturer
In [30]:
def cleanup(df):
# df = df.drop(['Unnamed: 0'], axis=1)
df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='s')
df['Outcome'] = df['Outcome'].replace('c', 1).replace('w', 0)
df = df[(df['Gender'] != '0') & (df['Age'] != 0)]
df = df[df['OS'] != '0']
df = df[df['CreativeCategory'] != '0']
return df
def resample_bid(df):
return df.set_index('Timestamp')[['ExchangeBid']].resample('5T', how='sum')
def return_counts():
for item in readables:
print(item + ": \n",df[item].value_counts()[:5],"\n"+"="*15)
def corr_bidwins_category(df):
# #df_corr.reset_index()
# df_corr = df.set_index(['Country', 'AppSiteCategory', 'Gender', 'Age']).sortlevel(0)
# df_corr#['ExchangeBid']#.groupby(level=[0,1,2,3]).mean()
df_country_bids = df[['Country', 'CreativeCategory', 'ExchangeBid']].sort_values(by='Country').set_index(['Country', 'CreativeCategory'])
df_con_corr = df_country_bids.groupby(level=1).mean().sort_values(by='ExchangeBid')
return df_con_corr
In [28]:
df_android = cleanup(df_android)
df_ios = cleanup(df_ios)
In [31]:
df_con_corr = corr_bidwins_category(df_android)
# df_con_corr.plot(figsize=(16,8), kind='bar', linewidth=2)
In [32]:
df_con_corr
Out[32]: