In [ ]:
# Property values by zip code (Zillow and Trulia) and business pattern data (US census)

In [3]:
from random import randint
import pandas as pd
from pandas import DataFrame
import requests
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import folium
import csv
from datetime import datetime   # one part of the main module

In [99]:
"""
    Zillow does not provide history data: can't use it
"""
# zwsid = 'X1-ZWz1ewzc1aj37v_65xu9'
# state = 'WA'
# city = 'Seattle'
# neighborhood = 'Ballard'

# #query_url = 'http://www.zillow.com/webservice/GetDemographics.htm?zws-id=%s&state=%s&city=%s&neighborhood=%s' % (zwsid,state,city,neighborhood)
# query_url = 'http://www.zillow.com/webservice/GetDeepComps.htm?zws-id=%s&zpid=48749425&count=5' % (zwsid) 
# r = requests.get(query_url)

In [ ]:
"""
    So we're going to use Trulia.
"""
trulia_api_key = ''
query_url = 'http://api.trulia.com/webservices.php?library=TruliaStats&function=getCityStats&city=New York&state=NY&startDate=2007-02-10&endDate=2009-02-07&apikey=%s' % (trulia_api_key)
r = requests.get(query_url)

1. Get zip code from wikipedia


In [20]:
""" GET SF ZIP CODES from http://www.city-data.com/zipmaps/San-Francisco-California.html    
"""
import itertools
sf_zip_codes = [94102, 94103, 94104, 94105, 94107, 94108, 94109, 94110, 94111, 94112, 94114, 94115, 94116, 94117, 94118, 94121, 94122, 94123, 94124, 94127, 94129, 94131, 94132, 94133, 94134, 94158]

2. Convert zip code to coordinates


In [364]:
""" Geopy has zip code converter! """
from geopy.geocoders import Nominatim
geolocator = Nominatim()
location = geolocator.geocode("78704")
print 'EXAMPLE:'
print(location.address)
print((location.latitude, location.longitude))


EXAMPLE:
Austin, Travis County, Texas, 78704, United States of America
(30.2447182141636, -97.7634808020997)

In [369]:
"""
    But something is wrong.
"""
location = geolocator.geocode(sf_zip_codes[0])
print 'EXAMPLE:'
print(location.address)
print((location.latitude, location.longitude))


EXAMPLE:
Mogiliovas, Klaipėda, Klaipėdos miesto savivaldybė, Klaipėdos apskritis, 94102, Lietuva
(55.6898967634684, 21.2091635778727)

In [4]:
"""
    So we're using Google Geocode API.
"""
GOOGLE_KEY = ''
query_url = 'https://maps.googleapis.com/maps/api/geocode/json?address=94102&key=%s' % (GOOGLE_KEY)
r = requests.get(query_url)

In [375]:
r.json()


Out[375]:
{u'results': [{u'address_components': [{u'long_name': u'94102',
     u'short_name': u'94102',
     u'types': [u'postal_code']},
    {u'long_name': u'San Francisco',
     u'short_name': u'SF',
     u'types': [u'locality', u'political']},
    {u'long_name': u'San Francisco County',
     u'short_name': u'San Francisco County',
     u'types': [u'administrative_area_level_2', u'political']},
    {u'long_name': u'California',
     u'short_name': u'CA',
     u'types': [u'administrative_area_level_1', u'political']},
    {u'long_name': u'United States',
     u'short_name': u'US',
     u'types': [u'country', u'political']}],
   u'formatted_address': u'San Francisco, CA 94102, USA',
   u'geometry': {u'bounds': {u'northeast': {u'lat': 37.789226,
      u'lng': -122.4034491},
     u'southwest': {u'lat': 37.7694409, u'lng': -122.429849}},
    u'location': {u'lat': 37.7786871, u'lng': -122.4212424},
    u'location_type': u'APPROXIMATE',
    u'viewport': {u'northeast': {u'lat': 37.789226, u'lng': -122.4034491},
     u'southwest': {u'lat': 37.7694409, u'lng': -122.429849}}},
   u'place_id': u'ChIJs88qnZmAhYARk8u-7t1Sc2g',
   u'types': [u'postal_code']}],
 u'status': u'OK'}

In [399]:
"""
    Get coordinates.
"""
temp = r.json()
temp_ = temp['results'][0]['geometry']['location']
temp_


Out[399]:
{u'lat': 37.7786871, u'lng': -122.4212424}

In [22]:
lats = []
lngs = []
for sf_zip_code in sf_zip_codes:
    query_url = 'https://maps.googleapis.com/maps/api/geocode/json?address=%s&key=%s' % (str(sf_zip_code),GOOGLE_KEY)
    r = requests.get(query_url)    
    temp = r.json()
    lat = temp['results'][0]['geometry']['location']['lat']
    lng = temp['results'][0]['geometry']['location']['lng']
    lats.append(lat)
    lngs.append(lng)

3. Sanity check: map visualization


In [24]:
import folium
m = folium.Map(location=[37.7786871, -122.4212424],zoom_start=13)
m.circle_marker(location=[37.7786871, -122.4212424],radius=100)
for i in range(len(sf_zip_codes)):
     m.circle_marker(location=[lats[i], lngs[i]], radius=500, #100 seems good enough for now
                    popup=str(sf_zip_codes[i]), line_color = "#980043",
                    fill_color="#980043", fill_opacity=.2)
m.create_map(path='sf_zip_code_map.html')

4. Get bussiness type and # of establishments per year from US census

Check US census for the data. It can be downloaded as csv format.


In [51]:
# business type
df = pd.read_csv('zbp13detail.txt')
df.head()


Out[51]:
zip naics est n1_4 n5_9 n10_19 n20_49 n50_99 n100_249 n250_499 n500_999 n1000
0 501 ------ 2 2 0 0 0 0 0 0 0 0
1 501 81---- 2 2 0 0 0 0 0 0 0 0
2 501 813110 2 2 0 0 0 0 0 0 0 0
3 1001 ------ 439 199 68 75 52 28 14 2 0 1
4 1001 22---- 2 1 0 0 1 0 0 0 0 0

In [37]:
sf_zip_codes = [94102, 94103, 94104, 94105, 94107, 94108, 94109, 94110, 94111, 94112, 94114, 94115, 94116, 94117, 94118, 94121, 94122, 94123, 94124, 94127, 94129, 94131, 94132, 94133, 94134, 94158]
oak_zip_codes = [94601, 94602, 94603, 94605, 94606, 94607, 94610, 94611, 94612, 94613, 94621]
bay_zip_codes = sf_zip_codes + oak_zip_codes

# save zipcode file
import csv
myfile = open('bay_zip_codes.csv', 'wb')
wr = csv.writer(myfile)
wr.writerow(bay_zip_codes)

In [48]:
# load zipcode file
with open('bay_zip_codes.csv', 'rb') as f:
    reader = csv.reader(f)
    bay_zip_codes = list(reader)[0]
# convert str list to int list
bay_zip_codes = map(int, bay_zip_codes)

In [50]:
df_sf_oak = df.loc[df['zip'].isin(bay_zip_codes)]

In [52]:
# save as a file
df_sf_oak.to_csv('ZCBT_sf_oak_2013.csv',encoding='utf-8',index=False)
# sf1.sort(columns='est',ascending=False)

In [55]:
df_sf_oak.tail()


Out[55]:
zip naics est n1_4 n5_9 n10_19 n20_49 n50_99 n100_249 n250_499 n500_999 n1000
2972618 94621 813410 3 0 0 2 0 0 1 0 0 0
2972619 94621 813910 2 2 0 0 0 0 0 0 0 0
2972620 94621 813920 2 1 0 1 0 0 0 0 0 0
2972621 94621 813930 26 11 6 5 2 0 2 0 0 0
2972622 94621 99---- 1 1 0 0 0 0 0 0 0 0

In [431]:
# let's compare to EPA
epa = b.loc[b['zip'] == 94303]
epa.sort(columns='est',ascending=False)


Out[431]:
zip naics est n1_4 n5_9 n10_19 n20_49 n50_99 n100_249 n250_499 n500_999 n1000
2951410 94303 ------ 835 455 140 109 73 29 22 5 1 1
2951551 94303 54---- 219 143 27 23 13 8 4 1 0 0
2951606 94303 62---- 80 36 19 12 9 1 3 0 0 0
2951638 94303 81---- 71 39 16 9 6 1 0 0 0 0
2951539 94303 53---- 60 47 5 4 3 0 1 0 0 0
2951478 94303 44---- 56 19 14 12 4 3 3 1 0 0
2951414 94303 23---- 53 29 8 7 7 2 0 0 0 0
2951594 94303 61---- 44 17 6 7 7 4 2 1 0 0
2951562 94303 541511 40 32 4 0 1 1 2 0 0 0
2951582 94303 56---- 40 22 6 6 1 2 3 0 0 0
2951630 94303 72---- 39 12 8 14 2 1 0 2 0 0
2951526 94303 52---- 39 18 8 6 4 1 2 0 0 0
2951514 94303 51---- 32 17 5 2 5 2 1 0 0 0
2951456 94303 42---- 31 18 5 2 5 1 0 0 0 0
2951434 94303 31---- 30 14 6 1 4 2 2 0 0 1
2951552 94303 541110 30 16 4 3 4 2 1 0 0 0
2951624 94303 624410 24 13 5 4 1 0 1 0 0 0
2951571 94303 541712 19 8 3 4 1 2 1 0 0 0
2951566 94303 541611 18 16 0 1 1 0 0 0 0 0
2951636 94303 722513 18 3 7 6 1 0 0 1 0 0
2951563 94303 541512 17 11 2 3 1 0 0 0 0 0
2951543 94303 531210 16 14 0 1 1 0 0 0 0 0
2951569 94303 541690 15 10 3 1 1 0 0 0 0 0
2951540 94303 531110 15 13 0 1 1 0 0 0 0 0
2951651 94303 813110 15 8 3 2 1 1 0 0 0 0
2951625 94303 71---- 15 9 3 1 1 0 1 0 0 0
2951505 94303 48---- 15 9 2 2 1 1 0 0 0 0
2951595 94303 611110 14 1 1 4 3 4 1 0 0 0
2951591 94303 561730 13 9 2 2 0 0 0 0 0 0
2951557 94303 541330 12 5 2 3 1 0 0 1 0 0
... ... ... ... ... ... ... ... ... ... ... ... ...
2951492 94303 446130 1 1 0 0 0 0 0 0 0 0
2951599 94303 611512 1 0 0 1 0 0 0 0 0 0
2951477 94303 425120 1 1 0 0 0 0 0 0 0 0
2951597 94303 611420 1 1 0 0 0 0 0 0 0 0
2951423 94303 237310 1 1 0 0 0 0 0 0 0 0
2951592 94303 561740 1 0 1 0 0 0 0 0 0 0
2951424 94303 237990 1 1 0 0 0 0 0 0 0 0
2951473 94303 424330 1 1 0 0 0 0 0 0 0 0
2951589 94303 561710 1 0 0 1 0 0 0 0 0 0
2951588 94303 561622 1 1 0 0 0 0 0 0 0 0
2951587 94303 561499 1 1 0 0 0 0 0 0 0 0
2951508 94303 484220 1 0 1 0 0 0 0 0 0 0
2951583 94303 561110 1 1 0 0 0 0 0 0 0 0
2951580 94303 551112 1 0 0 1 0 0 0 0 0 0
2951476 94303 425110 1 1 0 0 0 0 0 0 0 0
2951575 94303 541850 1 1 0 0 0 0 0 0 0 0
2951555 94303 541219 1 1 0 0 0 0 0 0 0 0
2951574 94303 541840 1 1 0 0 0 0 0 0 0 0
2951441 94303 333314 1 0 1 0 0 0 0 0 0 0
2951426 94303 238150 1 0 0 1 0 0 0 0 0 0
2951480 94303 441120 1 0 0 1 0 0 0 0 0 0
2951482 94303 441320 1 1 0 0 0 0 0 0 0 0
2951483 94303 442110 1 0 0 0 0 0 0 1 0 0
2951484 94303 442210 1 1 0 0 0 0 0 0 0 0
2951564 94303 541513 1 1 0 0 0 0 0 0 0 0
2951486 94303 444110 1 0 0 0 0 0 1 0 0 0
2951558 94303 541350 1 1 0 0 0 0 0 0 0 0
2951431 94303 238340 1 0 0 0 0 1 0 0 0 0
2951491 94303 445299 1 1 0 0 0 0 0 0 0 0
2951443 94303 334111 1 1 0 0 0 0 0 0 0 0

250 rows × 12 columns

3. Collect property values per zip code over time


In [353]:
import trulia.stats as trustat
import trulia.location as truloc
zip_code_stats = trulia.stats.TruliaStats(TRULIA_KEY).get_zip_code_stats(zip_code='90025', start_date='2014-01-01', end_date='2014-01-31')

In [354]:
temp = zip_code_stats['listingStats']['listingStat']
df = DataFrame(temp)
df.head()


Out[354]:
listingPrice weekEndingDate
0 {u'subcategory': [{u'type': u'All Properties',... 2014-01-04
1 {u'subcategory': [{u'type': u'All Properties',... 2014-01-04
2 {u'subcategory': [{u'type': u'All Properties',... 2014-01-11
3 {u'subcategory': [{u'type': u'All Properties',... 2014-01-18
4 {u'subcategory': [{u'type': u'All Properties',... 2014-01-18

In [355]:
def func(x,key):
    k = x['subcategory'][0][key] # here I read key values 
    return pd.Series(k)

In [356]:
df['numProperties']=df['listingPrice'].apply((lambda x: func(x,'numberOfProperties')))
df['medPrice']=df['listingPrice'].apply((lambda x: func(x,'medianListingPrice')))
df['avrPrice']=df['listingPrice'].apply((lambda x: func(x,'averageListingPrice')))
df = df.drop('listingPrice',1)
df.head()


Out[356]:
weekEndingDate numProperties medPrice avrPrice
0 2014-01-04 58 738667 1138490
1 2014-01-04 68 737000 1061787
2 2014-01-11 54 738643 1149556
3 2014-01-18 55 739333 1132056
4 2014-01-18 58 744500 1140457

Neighborhood boundaries in SF


In [357]:
"""
    Get neighborhoods
"""
neighborhoods = trulia.location.LocationInfo(TRULIA_KEY).get_neighborhoods_in_city('San Francisco', 'CA')

In [358]:
neighborhoods


Out[358]:
[OrderedDict([(u'id', u'1386'), (u'name', u'Alamo Square')]),
 OrderedDict([(u'id', u'1387'), (u'name', u'Anza Vista')]),
 OrderedDict([(u'id', u'1388'), (u'name', u'Ashbury Heights')]),
 OrderedDict([(u'id', u'1389'), (u'name', u'Balboa Terrace')]),
 OrderedDict([(u'id', u'1390'), (u'name', u'Bayview')]),
 OrderedDict([(u'id', u'1391'), (u'name', u'Bernal Heights')]),
 OrderedDict([(u'id', u'1392'), (u'name', u"Bernt's Knoll")]),
 OrderedDict([(u'id', u'1393'), (u'name', u'Buena Vista Park')]),
 OrderedDict([(u'id', u'1394'), (u'name', u'Castro')]),
 OrderedDict([(u'id', u'1395'), (u'name', u'Central Richmond')]),
 OrderedDict([(u'id', u'1396'), (u'name', u'Central Sunset')]),
 OrderedDict([(u'id', u'1397'), (u'name', u'Central Waterfront')]),
 OrderedDict([(u'id', u'1398'), (u'name', u'Chinatown')]),
 OrderedDict([(u'id', u'1399'), (u'name', u'Civic Center')]),
 OrderedDict([(u'id', u'1400'), (u'name', u'Cole Valley')]),
 OrderedDict([(u'id', u'1401'), (u'name', u'Cow Hollow')]),
 OrderedDict([(u'id', u'1402'), (u'name', u'Crocker Amazon')]),
 OrderedDict([(u'id', u'1403'), (u'name', u'Diamond Heights')]),
 OrderedDict([(u'id', u'1404'), (u'name', u'Downtown')]),
 OrderedDict([(u'id', u'1405'), (u'name', u'Duboce Triangle')]),
 OrderedDict([(u'id', u'1406'), (u'name', u'Excelsior')]),
 OrderedDict([(u'id', u'1407'), (u'name', u'Financial District')]),
 OrderedDict([(u'id', u'1408'), (u'name', u"Fisherman's Wharf")]),
 OrderedDict([(u'id', u'1409'), (u'name', u'Forest Hill')]),
 OrderedDict([(u'id', u'1410'), (u'name', u'Forest Hill Extension')]),
 OrderedDict([(u'id', u'1411'), (u'name', u'Forest Knolls')]),
 OrderedDict([(u'id', u'1412'), (u'name', u'Fort Mason')]),
 OrderedDict([(u'id', u'1413'), (u'name', u'Glen Park')]),
 OrderedDict([(u'id', u'1414'), (u'name', u'Golden Gate Heights')]),
 OrderedDict([(u'id', u'1415'), (u'name', u'Golden Gate Park')]),
 OrderedDict([(u'id', u'1416'), (u'name', u'Haight')]),
 OrderedDict([(u'id', u'1417'), (u'name', u'Hayes Valley')]),
 OrderedDict([(u'id', u'1418'), (u'name', u'Hunters Point')]),
 OrderedDict([(u'id', u'1419'), (u'name', u'Ingleside')]),
 OrderedDict([(u'id', u'1420'), (u'name', u'Ingleside Heights')]),
 OrderedDict([(u'id', u'1421'), (u'name', u'Ingleside Terrace')]),
 OrderedDict([(u'id', u'1422'), (u'name', u'Inner Parkside')]),
 OrderedDict([(u'id', u'1423'), (u'name', u'Inner Richmond')]),
 OrderedDict([(u'id', u'1424'), (u'name', u'Inner Sunset')]),
 OrderedDict([(u'id', u'1425'), (u'name', u'Japantown')]),
 OrderedDict([(u'id', u'1426'), (u'name', u'Lake Street')]),
 OrderedDict([(u'id', u'1427'), (u'name', u'Laurel Heights')]),
 OrderedDict([(u'id', u'1428'), (u'name', u'Lincoln Park - Lobos')]),
 OrderedDict([(u'id', u'1429'), (u'name', u'Lone Mountain')]),
 OrderedDict([(u'id', u'1430'), (u'name', u'Lower Pacific Heights')]),
 OrderedDict([(u'id', u'1431'), (u'name', u'Marina')]),
 OrderedDict([(u'id', u'1432'), (u'name', u'Merced Heights')]),
 OrderedDict([(u'id', u'1433'), (u'name', u'Merced Manor')]),
 OrderedDict([(u'id', u'1434'), (u'name', u'Midtown Terrace')]),
 OrderedDict([(u'id', u'1435'), (u'name', u'Miraloma')]),
 OrderedDict([(u'id', u'1436'), (u'name', u'Mission')]),
 OrderedDict([(u'id', u'1437'), (u'name', u'Mission Bay')]),
 OrderedDict([(u'id', u'1438'), (u'name', u'Mission Dolores')]),
 OrderedDict([(u'id', u'1439'), (u'name', u'Mission Terrace')]),
 OrderedDict([(u'id', u'1440'), (u'name', u'Monterey Heights')]),
 OrderedDict([(u'id', u'1441'), (u'name', u'Mount Davidson Manor')]),
 OrderedDict([(u'id', u'1442'), (u'name', u'Nob Hill')]),
 OrderedDict([(u'id', u'1443'), (u'name', u'Noe Valley')]),
 OrderedDict([(u'id', u'1444'), (u'name', u'North Beach')]),
 OrderedDict([(u'id', u'1445'), (u'name', u'North Park')]),
 OrderedDict([(u'id', u'1446'), (u'name', u'Oceanview')]),
 OrderedDict([(u'id', u'1447'), (u'name', u'Outer Mission')]),
 OrderedDict([(u'id', u'1448'), (u'name', u'Outer Richmond')]),
 OrderedDict([(u'id', u'1449'), (u'name', u'Outer Sunset')]),
 OrderedDict([(u'id', u'1450'), (u'name', u'Pacific Heights')]),
 OrderedDict([(u'id', u'1451'), (u'name', u'Panhandle')]),
 OrderedDict([(u'id', u'1452'), (u'name', u'Portola')]),
 OrderedDict([(u'id', u'1453'), (u'name', u'Potrero Hill')]),
 OrderedDict([(u'id', u'1454'), (u'name', u'Presidio')]),
 OrderedDict([(u'id', u'1455'), (u'name', u'Presidio Heights')]),
 OrderedDict([(u'id', u'1456'), (u'name', u'Russian Hill')]),
 OrderedDict([(u'id', u'1457'), (u'name', u'Saint Francis Wood')]),
 OrderedDict([(u'id', u'1458'), (u'name', u'San Francisco State University')]),
 OrderedDict([(u'id', u'1459'), (u'name', u'Sea Cliff')]),
 OrderedDict([(u'id', u'1460'), (u'name', u'Sherwood Forest')]),
 OrderedDict([(u'id', u'1461'), (u'name', u'Silver Terrace')]),
 OrderedDict([(u'id', u'1462'), (u'name', u'SoMa')]),
 OrderedDict([(u'id', u'1463'), (u'name', u'South Beach')]),
 OrderedDict([(u'id', u'1464'), (u'name', u'Stonestown')]),
 OrderedDict([(u'id', u'1465'), (u'name', u'Stonestown Apartments')]),
 OrderedDict([(u'id', u'1466'), (u'name', u'Sunnyside')]),
 OrderedDict([(u'id', u'1467'), (u'name', u'Telegraph Hill')]),
 OrderedDict([(u'id', u'1468'), (u'name', u'Tenderloin')]),
 OrderedDict([(u'id', u'1469'), (u'name', u'Treasure Island')]),
 OrderedDict([(u'id', u'1470'), (u'name', u'Twin Peaks')]),
 OrderedDict([(u'id', u'1471'), (u'name', u'Union Square')]),
 OrderedDict([(u'id', u'1472'), (u'name', u'Visitacion Valley')]),
 OrderedDict([(u'id', u'1473'), (u'name', u'West Portal')]),
 OrderedDict([(u'id', u'1474'), (u'name', u'Western Addition')]),
 OrderedDict([(u'id', u'1475'), (u'name', u'Westwood Highlands')]),
 OrderedDict([(u'id', u'1476'), (u'name', u'Westwood Park')]),
 OrderedDict([(u'id', u'1477'), (u'name', u'Yerba Buena Island')])]

In [359]:
""" Trulia does not provide coordinates."""
Alamo_Square = neighborhoods[0]
Alamo_Square


Out[359]:
OrderedDict([(u'id', u'1386'), (u'name', u'Alamo Square')])

In [ ]:
neighborhood_stats = trustat.TruliaStats(TRULIA_KEY).get_neighborhood_stats(neighborhood_id=7183, start_date='2012-01-01', end_date='2012-06-30')

In [229]:
neighborhood_stats.keys()


[u'location', u'trafficStats', u'listingStats']

In [230]:
neighborhood_stats['listingStats'].keys()


Out[230]:
[u'listingStat']

In [236]:
a = neighborhood_stats['listingStats']['listingStat']
b = DataFrame(a)
b.head()


Out[236]:
listingPrice weekEndingDate
0 {u'subcategory': [{u'type': u'All Properties',... 2012-01-07
1 {u'subcategory': [{u'type': u'All Properties',... 2012-01-14
2 {u'subcategory': [{u'type': u'All Properties',... 2012-01-21
3 {u'subcategory': [{u'type': u'All Properties',... 2012-01-28
4 {u'subcategory': [{u'type': u'All Properties',... 2012-02-04

In [221]:
# Let's focus on All properties
x = b['listingPrice'][0]
x['subcategory'][0]


Out[221]:
OrderedDict([(u'type', u'All Properties'),
             (u'numberOfProperties', u'35'),
             (u'medianListingPrice', u'1745143'),
             (u'averageListingPrice', u'2072702')])

In [222]:
x['subcategory'][0]['type']


Out[222]:
u'All Properties'

In [237]:
b['numProperties']=b['listingPrice'].apply((lambda x: func(x,'numberOfProperties')))
b['medPrice']=b['listingPrice'].apply((lambda x: func(x,'medianListingPrice')))
b['avrPrice']=b['listingPrice'].apply((lambda x: func(x,'averageListingPrice')))

In [238]:
b.drop('listingPrice',1)


Out[238]:
weekEndingDate numProperties medPrice avrPrice
0 2012-01-07 2 784000 784000
1 2012-01-14 2 784143 784143
2 2012-01-21 2 784500 784500
3 2012-01-28 2 784500 784500
4 2012-02-04 2 779667 749444
5 2012-02-11 2 784500 784500
6 2012-02-11 3 770000 723000
7 2012-02-18 3 739786 712929
8 2012-02-25 2 699500 699500
9 2012-03-03 2 685286 685286
10 2012-03-10 1 799000 799000
11 2012-03-10 2 699500 699500
12 2012-03-17 1 784917 784917
13 2012-03-24 2 714500 714500
14 2012-03-31 2 714500 714500
15 2012-04-07 2 714500 714500
16 2012-04-14 2 714500 714500
17 2012-04-21 2 714500 714500
18 2012-04-28 2 714500 714500
19 2012-05-05 2 750714 750714
20 2012-05-12 2 774500 774500
21 2012-05-19 2 774500 774500
22 2012-05-26 2 770417 761839
23 2012-05-26 3 750000 698533
24 2012-06-02 3 753571 716809
25 2012-06-09 1 775000 775000
26 2012-06-09 2 762500 762500
27 2012-06-16 1 771429 771429
28 2012-06-23 1 750000 750000
29 2012-06-30 1 750000 750000
30 2012-06-30 2 750000 750000

In [259]:
matplotlib.dates.date2num(a)


Out[259]:
734509.0

In [262]:
date_list=[]
for date in b['weekEndingDate']:
    date_list.append(datetime.strptime(date,'%Y-%m-%d'))
#a = datetime.strptime(b['weekEndingDate'],'%Y-%m-%d')

In [271]:
# plot time vs. value
dates = matplotlib.dates.date2num(date_list)

fig, ax = plt.subplots()
ax.plot_date(dates, b.medPrice,'-')


Out[271]:
[<matplotlib.lines.Line2D at 0x10e8ecf50>]

In [ ]: