Requests is a simple HTTP library for doing wget/curl type operations.
In [1]:
import numpy as np
import pandas as pd
import requests
import os
In [2]:
# GET A CSV OF ALL STARBUCKS LOCATIONS
# If this link is ever broken, use the link above to get a new one
fname = 'All_Starbucks_Locations_in_the_World.csv'
if not(os.path.isfile(fname)):
print 'Getting file from Socrata portal'
r = requests.get('https://opendata.socrata.com/api/views/xy4y-c4mk/rows.csv?accessType=DOWNLOAD')
f = open(fname, 'w')
f.write(r.text.encode('utf-8'))
f.close()
df = pd.read_csv(fname)
In [3]:
# LET'S GET SOME SUMMARY STATISTICS BY COUNTRY
by_country = pd.DataFrame(df.groupby(['Country'])['Store ID'].count())
by_country.sort('Store ID', ascending=False, inplace=True)
by_country.columns = ['count']
by_country['percentage'] = by_country['count'] / by_country['count'].sum()
by_country.head()
Out[3]:
In [4]:
# DRILL DOWN BY STATES
filter = df['Country'] == 'US'
usa = pd.DataFrame(df[filter])
by_state = pd.DataFrame(usa.groupby(['Country Subdivision'])['Store ID'].count())
by_state.sort('Store ID', ascending=False, inplace=True)
by_state.columns = ['count']
by_state['percentage'] = by_state['count'] / by_state['count'].sum()
by_state.head()
Out[4]:
In [5]:
# FOCUS ON LOS ANGELES
cfilter = df['Country'] == 'US'
sfilter = df['Country Subdivision'] == 'CA'
lafilter = df['City'] == 'Los Angeles'
filter = cfilter & sfilter & lafilter
la = df[filter].copy()
In [6]:
# HOW MANY ROWS AND COLUMNS?
la.shape
Out[6]:
In [7]:
# CAN YOU FIND YOUR FAVORITE?
la[['Street 1', 'Street 2']]
Out[7]:
In [8]:
co_series = la['Ownership Type']=='CO'
co_series.head()
Out[8]:
In [9]:
~co_series.head()
Out[9]:
In [10]:
co_series.tolist()
Out[10]:
In [11]:
la.sort('Postal Code', inplace=True)
la.head()
Out[11]:
In [12]:
la.index
Out[12]:
In [13]:
la.index = np.arange(la.shape[0])
la.index
Out[13]:
In [14]:
la.head()
Out[14]:
In [15]:
la.drop('Brand', axis=1, inplace=True)
cols = la.columns.tolist()
cols[0] = 'store_id'
la.columns = cols
la.head()
Out[15]: