In [1]:
from googleads import adwords

In [2]:
import time

In [3]:
from StringIO import StringIO
import pandas as pd
import numpy as np

In [73]:
a = ad_quality('us', '20170801', '20170915')

In [74]:
a.get_accounts()

In [75]:
x = a.get_all_enabled_ads(4403620381)

In [76]:
z = a.ad_ranking(4403620381)


filtered 47474 adgroups with less than or equal to 6 ads

In [71]:
#z[z.Impressions>100]

In [80]:
z[z['Ad group ID']==24648224311].sort_values('ranking')[['Ad ID',
                            'Ad type', 'Impressions',
                            'Clicks','Cost', 'imp_share', 'ctr', 'std', 'ctr_adjusted', 'ranking']]


Out[80]:
Ad ID Ad type Impressions Clicks Cost imp_share ctr std ctr_adjusted ranking
418517 162158060825 Expanded text ad 7504 976 507760000 0.878174 0.130064 0.003883 0.126181 1.0
418509 85918412191 Text ad 9 2 1340000 0.001053 0.222222 0.138580 0.083642 2.0
418516 162158060816 Expanded text ad 773 63 29110000 0.090462 0.081501 0.009841 0.071660 3.0
418514 162158060828 Expanded text ad 54 6 3760000 0.006319 0.111111 0.042767 0.068344 4.0
418507 85918412551 Text ad 3 1 640000 0.000351 0.333333 0.272166 0.061168 5.0
418510 162158060819 Expanded text ad 12 2 1350000 0.001404 0.166667 0.107583 0.059084 6.0
418512 85918412071 Text ad 30 2 680000 0.003511 0.066667 0.045542 0.021125 7.0
418508 162158060831 Expanded text ad 6 1 640000 0.000702 0.166667 0.152145 0.014522 8.0
418513 85918411951 Text ad 47 2 1560000 0.005500 0.042553 0.029442 0.013111 9.0
418511 162158060822 Expanded text ad 27 1 620000 0.003160 0.037037 0.036345 0.000692 10.0
418505 85918412671 Text ad 0 0 0 0.000000 0.000000 0.000000 0.000000 11.0
418506 85918412431 Text ad 1 0 0 0.000117 0.000000 0.000000 0.000000 11.0
418515 85918412311 Text ad 79 0 0 0.009245 0.000000 0.000000 0.000000 11.0

In [65]:
class ad_quality(object):
    def __init__(self, country, start, end, version='v201705', max_ads=6):
        self._client = adwords.AdWordsClient.LoadFromStorage()
        self._country = country
        self._page_size = 500
        self._max_ads = max_ads
        self._start = start
        self._end = end
        self._active = 20170718
        self._version = version
        self._all_accounts = {}
        self._new_accounts_df = None
        self._max_operations = 3000
        self._current_account = None
        self._current_ads = None
        
    
    def get_accounts(self):
        managed_customer_service = self._client.GetService('ManagedCustomerService', version=self._version)
        selector = {
            'fields': ['CustomerId', 'Name'],
            'predicates': [{'field' : 'ExcludeHiddenAccounts',
                            'operator' : 'EQUALS' ,
                            'values' : 'true'
                            }]
        }    
        account_ids = []
        page = managed_customer_service.get(selector)
        if 'entries' in page and page['entries']:
            for account in page['entries']:
                if str(account['name']).startswith(self._country + '_') and 'jobseeker' in str(account['name']) \
                    and '_aed_' not in str(account['name']) and '_app' not in str(account['name'])\
                    and '_disabled' not in str(account['name']) and '_dsa' not in str(account['name'])\
                    and '_display_' not in str(account['name']) and '_content_' not in str(account['name']):               
                    self._all_accounts[account['customerId']] = account['name']
            
    def get_all_enabled_ads(self, account_id):
        self._client.SetClientCustomerId(account_id)
        report_downloader = self._client.GetReportDownloader(version=self._version)
        qry = ('SELECT CampaignId, AdGroupId, Id, AdType, Impressions, Clicks, Cost FROM '
               'AD_PERFORMANCE_REPORT WHERE Status IN [ENABLED] AND CampaignStatus in [ENABLED] '
               'AND AdGroupStatus in [ENABLED] DURING {0}, {1}').format(self._start, self._end)  
        stream_data = report_downloader.DownloadReportAsStringWithAwql(qry, 'CSV',
                skip_report_header=True, skip_column_header=False,
                skip_report_summary=True, include_zero_impressions=True)
        report_data = StringIO(stream_data)
        report_df = pd.DataFrame.from_csv(report_data, sep=',').reset_index()
        report_df = report_df.sort_values('Impressions', ascending=True)
        return report_df
    
    def sample_ad_ranking(self, account_id=4403620381, campaign_id=349931071, ad_group_id=24639079711):
        ads = self.get_all_enabled_ads(account_id)
        y = ads[(ads['Campaign ID']==campaign_id)&(ads['Ad group ID']==ad_group_id)]
        y['share'] = y['Impressions']*1.0/y['Impressions'].sum()
        y['ctr'] = y['Clicks']*1.0/y['Impressions']
        y = y.fillna(0)
        y['std'] = (y['ctr']*(1-y['ctr']))/y['Impressions']
        y = y.fillna(0)
        y['std'] = y['std'].pow(1./2)
        y['ctr_adjusted'] = y['ctr'] - y['std']
        return y.sort_values('ctr_adjusted', ascending=False)
    
    def filter_adgroup_with_enough_ads(self, data):
        max_ads = self._max_ads
        count_ads = data.groupby(['Campaign ID', 'Ad group ID'])['Ad ID'].count().reset_index()
        passed = count_ads[count_ads['Ad ID']>max_ads][['Campaign ID', 'Ad group ID']]
        print ("filtered {0} adgroups with less than or equal to {1} "
               "ads").format(len(count_ads[count_ads['Ad ID']<=max_ads]), max_ads)
        return pd.merge(passed, data, how='left', on=['Campaign ID', 'Ad group ID'])

    def include_impression_share(self, data):
        sum_impressions = data.groupby(['Campaign ID', 'Ad group ID'])['Impressions'].sum().reset_index()   
        sum_impressions.columns = ['Campaign ID', 'Ad group ID', 'Total Impressions']
        # add info 
        with_info = pd.merge(data, sum_impressions, how='left', on=['Campaign ID', 'Ad group ID'])
        # impression share
        with_info['imp_share'] = with_info['Impressions']/with_info['Total Impressions']
        with_info = with_info.fillna(0)
        return with_info    
    
    def ad_ranking(self, account_id, metric='ctr_adjusted'):
        y = self.get_all_enabled_ads(account_id)
        y = self.filter_adgroup_with_enough_ads(y)
        y = self.include_impression_share(y)
        # ctr
        y['ctr'] = y['Clicks']*1.0/y['Impressions']
        y = y.fillna(0)
        # standard deviation
        y['std'] = (y['ctr']*(1-y['ctr']))/y['Impressions']
        y = y.fillna(0)
        y['std'] = y['std'].pow(1./2)
        # adjusted ctr
        y['ctr_adjusted'] = y['ctr'] - y['std']  
        ranked = y.groupby(['Campaign ID', 'Ad group ID'])[metric].rank(method='min', ascending=False).to_frame()
        ranked.columns = ['ranking']
        return pd.merge(y, ranked, left_index=True, right_index=True)

                         
    def low_impression_share(self, account_id, campaign_id):
        data = self.get_ad_data(account_id, campaign_id)
        count_ads = data.groupby(['Campaign ID', 'Ad group ID'])['Ad ID'].count().reset_index()
        sum_impressions = data.groupby(['Campaign ID', 'Ad group ID'])['Impressions'].sum().reset_index()   
        sum_impressions.columns = ['Campaign ID', 'Ad group ID', 'Total Impressions']
        #adgroup with greater than _max_ads
        filtered = count_ads[count_ads['Ad ID']>self._max_ads][['Campaign ID', 'Ad group ID']]
        #add info 
        with_info = pd.merge(filtered, data, how='left', on=['Campaign ID', 'Ad group ID'])
        with_info = pd.merge(with_info, sum_impressions, how='left', on=['Campaign ID', 'Ad group ID'])
        with_info = with_info[['Ad group ID', 'Ad ID', 'Impressions', 'Clicks', 'Total Impressions']]
        #ctr & impression share
        with_info['ctr'] = with_info['Clicks']/with_info['Impressions']
        with_info['imp_share'] = with_info['Impressions']/with_info['Total Impressions']
        with_info = with_info.fillna(0)
        #rank
        share_rank = with_info.groupby(['Ad group ID'])['imp_share'].rank(ascending=False).to_frame()
        share_rank.columns = ['share_rank']
        #merged
        merged = pd.merge(with_info, share_rank, left_index=True, right_index=True)
        merged = merged.sort_values(['Ad group ID', 'share_rank'])
        return merged


    def ads_to_pause_low_ctr_high_std_dev(self, account_id, campaign_id):
        data = self.get_ad_data(account_id, campaign_id)
        count_ads = data.groupby(['Campaign ID', 'Ad group ID'])['Ad ID'].count().reset_index()
        #adgroup with greater than _max_ads
        filtered = count_ads[count_ads['Ad ID']>self._max_ads][['Campaign ID', 'Ad group ID']]
        #add info 
        with_info = pd.merge(filtered, data, how='left', on=['Campaign ID', 'Ad group ID'])
        with_info = with_info[['Ad group ID', 'Ad ID', 'Impressions', 'Clicks']]
        #ctr
        with_info['ctr'] = with_info['Clicks']/with_info['Impressions']
        with_info = with_info.fillna(0)
        with_info['std'] = np.sqrt((with_info['ctr']*(1-with_info['ctr']))/with_info['Impressions'])
        with_info = with_info.fillna(0)
        with_info['ctr_adjusted'] = with_info['ctr'] - with_info['std']
        #rank
        ranking = with_info.groupby(['Ad group ID'])['ctr_adjusted'].rank(ascending=False).to_frame()
        ranking.columns = ['ranking']
        #merged
        merged = pd.merge(with_info, ranking, left_index=True, right_index=True)
        #rank >= max_ads
        #TODO: handle ties
        to_be_paused = merged[(merged.ranking>=self._max_ads)]
        return zip(list(to_be_paused['Ad group ID'].values),list(to_be_paused['Ad ID'].values))   

        
    def ads_to_pause(self, account_id, campaign_id):
        data = self.get_ad_data(account_id, campaign_id)
        count_ads = data.groupby(['Campaign ID', 'Ad group ID'])['Ad ID'].count().reset_index()
        #adgroup with greater than _max_ads
        filtered = count_ads[count_ads['Ad ID']>self._max_ads][['Campaign ID', 'Ad group ID']]
        #add info 
        with_info = pd.merge(filtered, data, how='left', on=['Campaign ID', 'Ad group ID'])
        with_info = with_info[['Ad group ID', 'Ad ID', 'Impressions', 'Clicks']]
        #ctr
        with_info['ctr'] = with_info['Clicks']/with_info['Impressions']
        with_info = with_info.fillna(0)
        #rank
        ranking = with_info.groupby(['Ad group ID'])['ctr'].rank(ascending=False).to_frame()
        ranking.columns = ['ranking']
        #merged
        merged = pd.merge(with_info, ranking, left_index=True, right_index=True)
        #rank >= max_ads & impressions > 100
        to_be_paused = merged[(merged.ranking>=self._max_ads)&(merged.Impressions>100)]
        return zip(list(to_be_paused['Ad group ID'].values),list(to_be_paused['Ad ID'].values))
    
    def pause_multiple_ads(self, account_id, df_data, verbose=False):
        self._client.SetClientCustomerId(account_id)
        ad_group_ad_service = self._client.GetService('AdGroupAdService', version=self._version)
        
        # construct operations
        operations = []
        for i, j in zip(df_data['Ad group ID'], df_data['Ad ID']):
            operations.append({'operator': 'SET', 
                               'operand': {'adGroupId': i,
                                           'ad': {'id': j},
                                           'status': 'PAUSED'}
                               })
        # start pausing
        for i in xrange(0, len(operations), self._max_operations):
            tmp_operations = operations[i:i+self._max_operations]
            result = ad_group_ad_service.mutate(tmp_operations)
            if verbose:
                for ad in result['value']:
                    print ('Ad with id "{0}" and type "{1}" was paused.'.format(ad['ad']['id'], ad['ad']['Ad.Type']))

In [ ]: