In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [2]:
df = pd.read_csv('~/Downloads/search-terms.csv')
In [3]:
df.head(1)
Out[3]:
In [ ]:
In [4]:
df.head(1)
Out[4]:
In [5]:
df.Impressions = df.Impressions.apply(lambda x: x.replace(',',''))
In [6]:
df.Clicks = df.Clicks.apply(lambda x: x.replace(',',''))
In [7]:
df.CTR = df.CTR.apply(lambda x: x.rstrip('%'))
In [8]:
df.Impressions = pd.to_numeric(df['Impressions'])
In [9]:
df.Clicks = pd.to_numeric(df.Clicks)
In [10]:
df = df[df['CTR'].str.contains('<')==False]
In [11]:
df.CTR = pd.to_numeric(df.CTR)
In [12]:
plt.figure()
Out[12]:
In [13]:
df.plot(x='Average Position',y='CTR',kind='scatter')
Out[13]:
In [14]:
df.describe()
Out[14]:
In [15]:
df.CTR.quantile(q=.75)
Out[15]:
In [16]:
sns.regplot(x='Average Position',
y='CTR',
data=df[df.Clicks >0],
logx=True,
x_bins=50)
Out[16]:
In [17]:
df.sort_values('Clicks',ascending=False)
Out[17]:
In [18]:
df[df['Average Position']<2].drop([3,104],axis=0).sort_values('Clicks',
ascending=False)
Out[18]:
In [19]:
df[(df['Average Position'] >=10) & (df['Average Position'] <12)].sort_values('Clicks',
ascending=False)
Out[19]:
In [24]:
df['Trees'] = [1 if x.find('tree')==True else 0 for x in df.Query]
In [26]:
df.Trees.value_counts()
Out[26]:
In [27]:
df.describe()
Out[27]:
In [31]:
df[5:100].plot(y='CTR',x='Impressions',kind='scatter')
Out[31]:
In [42]:
df[(df.Impressions > 25000) & (df.CTR < 5.0)].sort_values('Average Position').to_csv('High Impressions, Low CTR',index=False)
In [ ]: