In [15]:
import argparse
from googleads import adwords
import json
import pandas as pd
from __future__ import print_function
from datetime import datetime
import time
import os
import re
import sys
reload(sys)
sys.setdefaultencoding('UTF8')

In [11]:
class keyword_planner(object):
    def __init__(self,
                 country_code='US', 
                 language='English',
                 keyword_file='sample_keywords.txt', 
                 output_name=None,  
                 auth_file='~/auth.yaml', 
                 sleep_duration=60,
                 max_number_of_keywords=700):
        self._client = adwords.AdWordsClient.LoadFromStorage(auth_file)
        self._country_code = country_code
        self._language = language
        self._keyword_file = keyword_file
        self._output_name=output_name or 'result_'+str(datetime.now()).replace(' ','_')+'.txt'
        self._sleep_duration = sleep_duration
        self._max_number_of_keywords = max_number_of_keywords
    
    @staticmethod
    def display_languages():
        with open('./languagecodes.json', 'r') as readfile:
            lang = json.load(readfile)
        for i in sorted(lang):
            print (i, lang[i])
    
    @staticmethod
    def display_countries():
        cn = pd.read_csv('./countries.csv')
        cn = cn.fillna('NA')
        cn.set_index('Country Code', inplace=True)
        countries = cn['Name'].to_dict()
        for i in sorted(countries):
            print(i, countries[i])
            
    def get_language_code(self, language):
        with open('./languagecodes.json', 'r') as readfile:
            lang = json.load(readfile)
            return lang[language]
        
    def get_country_id(self, country_code):
        cn = pd.read_csv('./countries.csv')
        cn = cn.fillna('NA') # Namibia is read as NaN by pandas
        cn.set_index('Country Code', inplace=True)
        return cn['Criteria ID'][country_code.upper()]
    
    def get_volume(self, keywords):
        """ keywords is a list of keywords """
        keywords = filter(None, keywords)
        language_code = self.get_language_code(self._language)
        country_id = self.get_country_id(self._country_code)
        targeting_idea_service = self._client.GetService('TargetingIdeaService', version='v201609')

        selector = {
            'searchParameters': [
            {
                'xsi_type': 'RelatedToQuerySearchParameter',
                'queries': keywords,
            },
            {
                'xsi_type': 'LanguageSearchParameter',
                'languages': [{'id': str(language_code)}]
            },
            {
                'xsi_type': 'LocationSearchParameter',
                'locations': [{'id': country_id}]
            }
            ],
            'ideaType': 'KEYWORD',
            'requestType': 'STATS',
            'requestedAttributeTypes': ['KEYWORD_TEXT', 'SEARCH_VOLUME'],
            'paging': {
                'startIndex': '0',
                'numberResults': str(len(keywords))
            }
        }

        page = targeting_idea_service.get(selector)
        return_data = {}
        try:
            for result in page['entries']:
                attributes = {}
                for attribute in result['data']:
                    attributes[attribute['key']] = getattr(attribute['value'], 'value', '0')
                return_data[attributes['KEYWORD_TEXT']] = int(attributes['SEARCH_VOLUME'])
        except Exception as e:
            print(e.__doc__)
            print(e.message)
        return return_data
    
    def get_estimate(self, data):
        """ data is a list of queries
        """
        final = {}
        error = []
        for i in xrange(0, len(data), self._max_number_of_keywords):     
            kw = data[i:i+self._max_number_of_keywords]
            try:
                tmp = self.get_volume(kw) 
            except Exception as e:
                print(e.message)
                time.sleep(self._sleep_duration)
                try:
                    tmp = self.get_volume(kw) 
                except Exception as e:
                    print(e.message)
                    error.extend(kw)
                    tmp = {}
                    time.sleep(self._sleep_duration)
            for k in tmp:
                final[k] = tmp[k]
        return final, error
    
    def get_estimate_recursive(self, data):
        """data is a list of queries
        """
        result_orig, error = self.get_estimate(data)
        while error:
            result, error = self.get_estimate(error)
            result_orig.update(result)
        return result_orig
    
    def process_result(self, result):
        #convert result (dict) to dataframe
        df = pd.DataFrame(result.items(), columns=['keyword', 'volume'])
        #filter zero volume keywords
        df = df[df.volume>0]
        df.to_csv(self._output_name, index=False)
        
    def get_data(self):
        return pd.read_csv(self._keyword_file, '\t', header=None, warn_bad_lines=True, error_bad_lines=False)
    
    def run(self):
        data = self.get_data()
        queries = data[0].values.tolist()
        print('Number of keywords to process: ', len(queries))
        result = self.get_estimate_recursive(queries)
        self.process_result(result)
        print('Done! Output saved as {0}'.format(self._output_name))

In [12]:
a = keyword_planner()

In [136]:
a.run()

In [88]:
def display_params(an):
    attrs = vars(an)
    print(', '.join("%s: %s" % i for i in attrs.items()))

In [89]:
display_params(a)

In [9]: