notebook.community

Edit and run



In [1]:

    
from googleads import adwords



In [2]:

    
import time



In [3]:

    
from StringIO import StringIO
import pandas as pd
import numpy as np



In [73]:

    
a = ad_quality('us', '20170801', '20170915')



In [74]:

    
a.get_accounts()



In [75]:

    
x = a.get_all_enabled_ads(4403620381)



In [76]:

    
z = a.ad_ranking(4403620381)









    



filtered 47474 adgroups with less than or equal to 6 ads



In [71]:

    
#z[z.Impressions>100]



In [80]:

    
z[z['Ad group ID']==24648224311].sort_values('ranking')[['Ad ID',
                            'Ad type', 'Impressions',
                            'Clicks','Cost', 'imp_share', 'ctr', 'std', 'ctr_adjusted', 'ranking']]









    Out[80]:







  
    
      
      Ad ID
      Ad type
      Impressions
      Clicks
      Cost
      imp_share
      ctr
      std
      ctr_adjusted
      ranking
    
  
  
    
      418517
      162158060825
      Expanded text ad
      7504
      976
      507760000
      0.878174
      0.130064
      0.003883
      0.126181
      1.0
    
    
      418509
      85918412191
      Text ad
      9
      2
      1340000
      0.001053
      0.222222
      0.138580
      0.083642
      2.0
    
    
      418516
      162158060816
      Expanded text ad
      773
      63
      29110000
      0.090462
      0.081501
      0.009841
      0.071660
      3.0
    
    
      418514
      162158060828
      Expanded text ad
      54
      6
      3760000
      0.006319
      0.111111
      0.042767
      0.068344
      4.0
    
    
      418507
      85918412551
      Text ad
      3
      1
      640000
      0.000351
      0.333333
      0.272166
      0.061168
      5.0
    
    
      418510
      162158060819
      Expanded text ad
      12
      2
      1350000
      0.001404
      0.166667
      0.107583
      0.059084
      6.0
    
    
      418512
      85918412071
      Text ad
      30
      2
      680000
      0.003511
      0.066667
      0.045542
      0.021125
      7.0
    
    
      418508
      162158060831
      Expanded text ad
      6
      1
      640000
      0.000702
      0.166667
      0.152145
      0.014522
      8.0
    
    
      418513
      85918411951
      Text ad
      47
      2
      1560000
      0.005500
      0.042553
      0.029442
      0.013111
      9.0
    
    
      418511
      162158060822
      Expanded text ad
      27
      1
      620000
      0.003160
      0.037037
      0.036345
      0.000692
      10.0
    
    
      418505
      85918412671
      Text ad
      0
      0
      0
      0.000000
      0.000000
      0.000000
      0.000000
      11.0
    
    
      418506
      85918412431
      Text ad
      1
      0
      0
      0.000117
      0.000000
      0.000000
      0.000000
      11.0
    
    
      418515
      85918412311
      Text ad
      79
      0
      0
      0.009245
      0.000000
      0.000000
      0.000000
      11.0



In [65]:

    
class ad_quality(object):
    def __init__(self, country, start, end, version='v201705', max_ads=6):
        self._client = adwords.AdWordsClient.LoadFromStorage()
        self._country = country
        self._page_size = 500
        self._max_ads = max_ads
        self._start = start
        self._end = end
        self._active = 20170718
        self._version = version
        self._all_accounts = {}
        self._new_accounts_df = None
        self._max_operations = 3000
        self._current_account = None
        self._current_ads = None
        
    
    def get_accounts(self):
        managed_customer_service = self._client.GetService('ManagedCustomerService', version=self._version)
        selector = {
            'fields': ['CustomerId', 'Name'],
            'predicates': [{'field' : 'ExcludeHiddenAccounts',
                            'operator' : 'EQUALS' ,
                            'values' : 'true'
                            }]
        }    
        account_ids = []
        page = managed_customer_service.get(selector)
        if 'entries' in page and page['entries']:
            for account in page['entries']:
                if str(account['name']).startswith(self._country + '_') and 'jobseeker' in str(account['name']) \
                    and '_aed_' not in str(account['name']) and '_app' not in str(account['name'])\
                    and '_disabled' not in str(account['name']) and '_dsa' not in str(account['name'])\
                    and '_display_' not in str(account['name']) and '_content_' not in str(account['name']):               
                    self._all_accounts[account['customerId']] = account['name']
            
    def get_all_enabled_ads(self, account_id):
        self._client.SetClientCustomerId(account_id)
        report_downloader = self._client.GetReportDownloader(version=self._version)
        qry = ('SELECT CampaignId, AdGroupId, Id, AdType, Impressions, Clicks, Cost FROM '
               'AD_PERFORMANCE_REPORT WHERE Status IN [ENABLED] AND CampaignStatus in [ENABLED] '
               'AND AdGroupStatus in [ENABLED] DURING {0}, {1}').format(self._start, self._end)  
        stream_data = report_downloader.DownloadReportAsStringWithAwql(qry, 'CSV',
                skip_report_header=True, skip_column_header=False,
                skip_report_summary=True, include_zero_impressions=True)
        report_data = StringIO(stream_data)
        report_df = pd.DataFrame.from_csv(report_data, sep=',').reset_index()
        report_df = report_df.sort_values('Impressions', ascending=True)
        return report_df
    
    def sample_ad_ranking(self, account_id=4403620381, campaign_id=349931071, ad_group_id=24639079711):
        ads = self.get_all_enabled_ads(account_id)
        y = ads[(ads['Campaign ID']==campaign_id)&(ads['Ad group ID']==ad_group_id)]
        y['share'] = y['Impressions']*1.0/y['Impressions'].sum()
        y['ctr'] = y['Clicks']*1.0/y['Impressions']
        y = y.fillna(0)
        y['std'] = (y['ctr']*(1-y['ctr']))/y['Impressions']
        y = y.fillna(0)
        y['std'] = y['std'].pow(1./2)
        y['ctr_adjusted'] = y['ctr'] - y['std']
        return y.sort_values('ctr_adjusted', ascending=False)
    
    def filter_adgroup_with_enough_ads(self, data):
        max_ads = self._max_ads
        count_ads = data.groupby(['Campaign ID', 'Ad group ID'])['Ad ID'].count().reset_index()
        passed = count_ads[count_ads['Ad ID']>max_ads][['Campaign ID', 'Ad group ID']]
        print ("filtered {0} adgroups with less than or equal to {1} "
               "ads").format(len(count_ads[count_ads['Ad ID']<=max_ads]), max_ads)
        return pd.merge(passed, data, how='left', on=['Campaign ID', 'Ad group ID'])

    def include_impression_share(self, data):
        sum_impressions = data.groupby(['Campaign ID', 'Ad group ID'])['Impressions'].sum().reset_index()   
        sum_impressions.columns = ['Campaign ID', 'Ad group ID', 'Total Impressions']
        # add info 
        with_info = pd.merge(data, sum_impressions, how='left', on=['Campaign ID', 'Ad group ID'])
        # impression share
        with_info['imp_share'] = with_info['Impressions']/with_info['Total Impressions']
        with_info = with_info.fillna(0)
        return with_info    
    
    def ad_ranking(self, account_id, metric='ctr_adjusted'):
        y = self.get_all_enabled_ads(account_id)
        y = self.filter_adgroup_with_enough_ads(y)
        y = self.include_impression_share(y)
        # ctr
        y['ctr'] = y['Clicks']*1.0/y['Impressions']
        y = y.fillna(0)
        # standard deviation
        y['std'] = (y['ctr']*(1-y['ctr']))/y['Impressions']
        y = y.fillna(0)
        y['std'] = y['std'].pow(1./2)
        # adjusted ctr
        y['ctr_adjusted'] = y['ctr'] - y['std']  
        ranked = y.groupby(['Campaign ID', 'Ad group ID'])[metric].rank(method='min', ascending=False).to_frame()
        ranked.columns = ['ranking']
        return pd.merge(y, ranked, left_index=True, right_index=True)

                         
    def low_impression_share(self, account_id, campaign_id):
        data = self.get_ad_data(account_id, campaign_id)
        count_ads = data.groupby(['Campaign ID', 'Ad group ID'])['Ad ID'].count().reset_index()
        sum_impressions = data.groupby(['Campaign ID', 'Ad group ID'])['Impressions'].sum().reset_index()   
        sum_impressions.columns = ['Campaign ID', 'Ad group ID', 'Total Impressions']
        #adgroup with greater than _max_ads
        filtered = count_ads[count_ads['Ad ID']>self._max_ads][['Campaign ID', 'Ad group ID']]
        #add info 
        with_info = pd.merge(filtered, data, how='left', on=['Campaign ID', 'Ad group ID'])
        with_info = pd.merge(with_info, sum_impressions, how='left', on=['Campaign ID', 'Ad group ID'])
        with_info = with_info[['Ad group ID', 'Ad ID', 'Impressions', 'Clicks', 'Total Impressions']]
        #ctr & impression share
        with_info['ctr'] = with_info['Clicks']/with_info['Impressions']
        with_info['imp_share'] = with_info['Impressions']/with_info['Total Impressions']
        with_info = with_info.fillna(0)
        #rank
        share_rank = with_info.groupby(['Ad group ID'])['imp_share'].rank(ascending=False).to_frame()
        share_rank.columns = ['share_rank']
        #merged
        merged = pd.merge(with_info, share_rank, left_index=True, right_index=True)
        merged = merged.sort_values(['Ad group ID', 'share_rank'])
        return merged


    def ads_to_pause_low_ctr_high_std_dev(self, account_id, campaign_id):
        data = self.get_ad_data(account_id, campaign_id)
        count_ads = data.groupby(['Campaign ID', 'Ad group ID'])['Ad ID'].count().reset_index()
        #adgroup with greater than _max_ads
        filtered = count_ads[count_ads['Ad ID']>self._max_ads][['Campaign ID', 'Ad group ID']]
        #add info 
        with_info = pd.merge(filtered, data, how='left', on=['Campaign ID', 'Ad group ID'])
        with_info = with_info[['Ad group ID', 'Ad ID', 'Impressions', 'Clicks']]
        #ctr
        with_info['ctr'] = with_info['Clicks']/with_info['Impressions']
        with_info = with_info.fillna(0)
        with_info['std'] = np.sqrt((with_info['ctr']*(1-with_info['ctr']))/with_info['Impressions'])
        with_info = with_info.fillna(0)
        with_info['ctr_adjusted'] = with_info['ctr'] - with_info['std']
        #rank
        ranking = with_info.groupby(['Ad group ID'])['ctr_adjusted'].rank(ascending=False).to_frame()
        ranking.columns = ['ranking']
        #merged
        merged = pd.merge(with_info, ranking, left_index=True, right_index=True)
        #rank >= max_ads
        #TODO: handle ties
        to_be_paused = merged[(merged.ranking>=self._max_ads)]
        return zip(list(to_be_paused['Ad group ID'].values),list(to_be_paused['Ad ID'].values))   

        
    def ads_to_pause(self, account_id, campaign_id):
        data = self.get_ad_data(account_id, campaign_id)
        count_ads = data.groupby(['Campaign ID', 'Ad group ID'])['Ad ID'].count().reset_index()
        #adgroup with greater than _max_ads
        filtered = count_ads[count_ads['Ad ID']>self._max_ads][['Campaign ID', 'Ad group ID']]
        #add info 
        with_info = pd.merge(filtered, data, how='left', on=['Campaign ID', 'Ad group ID'])
        with_info = with_info[['Ad group ID', 'Ad ID', 'Impressions', 'Clicks']]
        #ctr
        with_info['ctr'] = with_info['Clicks']/with_info['Impressions']
        with_info = with_info.fillna(0)
        #rank
        ranking = with_info.groupby(['Ad group ID'])['ctr'].rank(ascending=False).to_frame()
        ranking.columns = ['ranking']
        #merged
        merged = pd.merge(with_info, ranking, left_index=True, right_index=True)
        #rank >= max_ads & impressions > 100
        to_be_paused = merged[(merged.ranking>=self._max_ads)&(merged.Impressions>100)]
        return zip(list(to_be_paused['Ad group ID'].values),list(to_be_paused['Ad ID'].values))
    
    def pause_multiple_ads(self, account_id, df_data, verbose=False):
        self._client.SetClientCustomerId(account_id)
        ad_group_ad_service = self._client.GetService('AdGroupAdService', version=self._version)
        
        # construct operations
        operations = []
        for i, j in zip(df_data['Ad group ID'], df_data['Ad ID']):
            operations.append({'operator': 'SET', 
                               'operand': {'adGroupId': i,
                                           'ad': {'id': j},
                                           'status': 'PAUSED'}
                               })
        # start pausing
        for i in xrange(0, len(operations), self._max_operations):
            tmp_operations = operations[i:i+self._max_operations]
            result = ad_group_ad_service.mutate(tmp_operations)
            if verbose:
                for ad in result['value']:
                    print ('Ad with id "{0}" and type "{1}" was paused.'.format(ad['ad']['id'], ad['ad']['Ad.Type']))



In [ ]:

	Ad ID	Ad type	Impressions	Clicks	Cost	imp_share	ctr	std	ctr_adjusted	ranking
418517	162158060825	Expanded text ad	7504	976	507760000	0.878174	0.130064	0.003883	0.126181	1.0
418509	85918412191	Text ad	9	2	1340000	0.001053	0.222222	0.138580	0.083642	2.0
418516	162158060816	Expanded text ad	773	63	29110000	0.090462	0.081501	0.009841	0.071660	3.0
418514	162158060828	Expanded text ad	54	6	3760000	0.006319	0.111111	0.042767	0.068344	4.0
418507	85918412551	Text ad	3	1	640000	0.000351	0.333333	0.272166	0.061168	5.0
418510	162158060819	Expanded text ad	12	2	1350000	0.001404	0.166667	0.107583	0.059084	6.0
418512	85918412071	Text ad	30	2	680000	0.003511	0.066667	0.045542	0.021125	7.0
418508	162158060831	Expanded text ad	6	1	640000	0.000702	0.166667	0.152145	0.014522	8.0
418513	85918411951	Text ad	47	2	1560000	0.005500	0.042553	0.029442	0.013111	9.0
418511	162158060822	Expanded text ad	27	1	620000	0.003160	0.037037	0.036345	0.000692	10.0
418505	85918412671	Text ad	0	0	0	0.000000	0.000000	0.000000	0.000000	11.0
418506	85918412431	Text ad	1	0	0	0.000117	0.000000	0.000000	0.000000	11.0
418515	85918412311	Text ad	79	0	0	0.009245	0.000000	0.000000	0.000000	11.0