In [ ]:
import json
from bs4 import BeautifulSoup
import requests
from pprint import pprint
import re
import html5lib
import pandas as pd


states = {
        'Illinois':'IL',
        'Kansas':'KS',
        'South Dakota':'SD',
        'Idaho':'ID',
        'South Carolina':'SC',
        'Ohio':'OH',
        'Wyoming':'WY',
        'District of Columbia':'DC',
        'Alaska':'AK',
        'Rhode Island':'RI',
        'Texas':'TX',
        'Maryland':'MD',
        'Minnesota':'MN',
        'New Mexico':'NM',
        'Nevada':'NV',
        'Iowa':'IA',
        'West Virginia':'WV',
        'North Dakota':'ND',
        'Arkansas':'AR',
        'Arizona':'AZ',
        'Louisiana':'LA',
        'Delaware':'DE',
        'Florida':'FL',
        'Montana':'MT',
        'Missouri':'MO',
        'North Carolina':'NC',
        'Oklahoma':'OK',
        'Nebraska':'NE',
        'California':'CA',
        'Mississippi':'MS',
        'Wisconsin':'WI',
        'Indiana':'IN',
        'Georgia':'GA',
        'Massachusetts':'MA',
        'Tennessee':'TN',
        'New Hampshire':'NH',
        'Washington':'WA',
        'New Jersey':'NJ',
        'Connecticut':'CT',
        'Maine':'ME',
        'Oregon':'OR',
        'Vermont':'VT',
        'New York':'NY',
        'Alabama':'AL',
        'Hawaii':'HI',
        'Michigan':'MI',
        'Pennsylvania':'PA',
        'Virginia':'VA',
        'Utah':'UT',
        'Kentucky':'KY',
        'Colorado':'CO'
}
def getURL(state):
    state_abbr = states[state]
    state = state.replace(" ","-")
    url = 'http://www.weatherbase.com/weather/city.php3?c=US&s='+state_abbr+'&statename='+state+'-United-States-of-America'
    return url
#     print(url)

def getCitiesURL(cities):
    cityURL = {}
    for city in cities:
        url = 'http://www.weatherbase.com'+city.a.get('href')
        cityname = city.text
        cityURL[cityname] = url
#         break
    return cityURL

def getClimate(cities, state):
    
    for city in cities:
        temp_high = ''
        temp_high_f = False
        temp_low = ''
        temp_low_f = False
        precip = ''
        precip_f = False
        
        url = cities[city]
        handle = requests.get(url)
        data = handle.text
        soup = BeautifulSoup(data, 'html.parser')
        div = soup.find(attrs={'class':'p402_premium'})
        tables = div.find_all('table')
        
        print('-'*5+city+', '+state+'-'*5)
        for table in tables:
#             print(table.find('td').text)
            if table.find('td').text == 'Average Precipitation' and precip_f == False:
                print('\tPrecipitation Found')
                precip_f = True
                continue
            if table.find('td').text == 'Average High Temperature' and temp_high_f == False:
                print('\tHigh Temperature Found')
                temp_high_f = True
                continue
            if table.find('td').text == 'Average Low Temperature' and temp_low_f == False:
                print('\tLow Temperature Found')
                temp_low_f = True
                continue
            if precip_f == False and temp_high_f == False and temp_low_f == False:
                continue
            else:
                val = table.find('tr', attrs={'bgcolor':'white'}).find('td', attrs={'class':'data'}).text
#                 print(data)
                if precip_f == True:
                    precip = val
#                     print('precip',precip)
                    precip_f = False
                if temp_high_f == True:
                    temp_high = val
#                     print('temphigh',temp_high)
                    temp_high_f = False
                if temp_low_f == True:
                    temp_low = val
#                     print('templow',temp_low)
                    temp_low_f = False
        
        city_output = city+','+state+','+temp_high+','+temp_low+','+precip+'\n'
        print(city_output)
        fd = open('climateTable.csv', 'a')
        fd.write(city_output)
        fd.close()

def checkState(state, file):
    for line in range(len(file[1])):
        if file[1][line]==state:
            return True
    return False

for state in states.keys():
    file = pd.read_csv('climateTable.csv', header=None)
    if checkState(state, file) == True:
        print('Found '+state+', ...Skipping...')
        continue
    url = getURL(state)
#     url = 'http://www.weatherbase.com/weather/city.php3?c=US&s='+'CA'+'&statename='+'California'+'-United-States-of-America'
    handle = requests.get(url)
    data = handle.text
    soup = BeautifulSoup(data, 'html.parser')
    city_list = soup.find(id="row-nohover").find_all('li')
    cities = getCitiesURL(city_list)
    getClimate(cities, state)

In [ ]: