In [1]:
import pandas as pd
import numpy as np
%matplotlib inline

In [2]:
pwd


Out[2]:
u'/home/topo/repos/ghub/pynotebooks'

In [3]:
cd /home/topo/repos/ghub/pydata-book


/home/topo/repos/ghub/pydata-book

In [4]:
pd.read_csv('ch06/ex2.csv', header=None)


Out[4]:
0 1 2 3 4
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo

In [5]:
pd.read_csv('ch06/ex2.csv', names=['a', 'b', 'c', 'd', 'message'])


Out[5]:
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo

In [6]:
names = ['a', 'b', 'c', 'd', 'message']

In [7]:
pd.read_csv('ch06/ex2.csv', names=names, index_col='message')


Out[7]:
a b c d
message
hello 1 2 3 4
world 5 6 7 8
foo 9 10 11 12

In [8]:
pd.read_csv('ch06/ex2.csv', names=names, index_col='a')


Out[8]:
b c d message
a
1 2 3 4 hello
5 6 7 8 world
9 10 11 12 foo

In [9]:
!cat ch06/csv_mindex.csv


key1,key2,value1,value2
one,a,1,2
one,b,3,4
one,c,5,6
one,d,7,8
two,a,9,10
two,b,11,12
two,c,13,14
two,d,15,16

In [10]:
parsed = pd.read_csv('ch06/csv_mindex.csv', index_col=['key1', 'key2'])

In [11]:
parsed


Out[11]:
value1 value2
key1 key2
one a 1 2
b 3 4
c 5 6
d 7 8
two a 9 10
b 11 12
c 13 14
d 15 16

In [12]:
list(open('ch06/ex3.txt'))


Out[12]:
['            A         B         C\n',
 'aaa -0.264438 -1.026059 -0.619500\n',
 'bbb  0.927272  0.302904 -0.032399\n',
 'ccc -0.264273 -0.386314 -0.217601\n',
 'ddd -0.871858 -0.348382  1.100491\n']

In [13]:
pd.read_table('ch06/ex3.txt', sep='\s+')


Out[13]:
A B C
aaa -0.264438 -1.026059 -0.619500
bbb 0.927272 0.302904 -0.032399
ccc -0.264273 -0.386314 -0.217601
ddd -0.871858 -0.348382 1.100491

In [14]:
!cat ch06/ex4.csv


# hey!
a,b,c,d,message
# just wanted to make things more difficult for you
# who reads CSV files with computers, anyway?
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo

In [15]:
pd.read_table('ch06/ex4.csv', sep=',')


Out[15]:
# hey!
a b c d message
# just wanted to make things more difficult for you NaN NaN NaN NaN
# who reads CSV files with computers anyway? NaN NaN NaN
1 2 3 4 hello
5 6 7 8 world
9 10 11 12 foo

In [16]:
pd.read_table('ch06/ex4.csv', sep=',', skiprows=[0,2,3])


Out[16]:
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo

In [17]:
!cat ch06/ex5.csv


something,a,b,c,d,message
one,1,2,3,4,NA
two,5,6,,8,world
three,9,10,11,12,foo

In [20]:
res = pd.read_csv('ch06/ex5.csv')
res


Out[20]:
something a b c d message
0 one 1 2 3 4 NaN
1 two 5 6 NaN 8 world
2 three 9 10 11 12 foo

In [21]:
pd.isnull(res)


Out[21]:
something a b c d message
0 False False False False False True
1 False False False True False False
2 False False False False False False

In [25]:
result = pd.read_csv('ch06/ex5.csv', na_values=['NA'])
result


Out[25]:
something a b c d message
0 one 1 2 3 4 NaN
1 two 5 6 NaN 8 world
2 three 9 10 11 12 foo

In [26]:
sentinels = {'message': ['foo', 'NA'], 'something': ['two']}
sentinels


Out[26]:
{'message': ['foo', 'NA'], 'something': ['two']}

In [27]:
#custom values for NA treatment
pd.read_csv('ch06/ex5.csv', na_values=sentinels)


Out[27]:
something a b c d message
0 one 1 2 3 4 NaN
1 NaN 5 6 NaN 8 world
2 three 9 10 11 12 NaN

In [28]:
result = pd.read_csv('ch06/ex6.csv')

In [30]:
result.head()


Out[30]:
one two three four key
0 0.467976 -0.038649 -0.295344 -1.824726 L
1 -0.358893 1.404453 0.704965 -0.200638 B
2 -0.501840 0.659254 -0.421691 -0.057688 G
3 0.204886 1.074134 1.388361 -0.982404 R
4 0.354628 -0.133116 0.283763 -0.837063 Q

In [32]:
pd.read_csv('ch06/ex6.csv', nrows=6)


Out[32]:
one two three four key
0 0.467976 -0.038649 -0.295344 -1.824726 L
1 -0.358893 1.404453 0.704965 -0.200638 B
2 -0.501840 0.659254 -0.421691 -0.057688 G
3 0.204886 1.074134 1.388361 -0.982404 R
4 0.354628 -0.133116 0.283763 -0.837063 Q
5 1.817480 0.742273 0.419395 -2.251035 Q

In [33]:
data = pd.read_csv('ch06/ex5.csv')
data


Out[33]:
something a b c d message
0 one 1 2 3 4 NaN
1 two 5 6 NaN 8 world
2 three 9 10 11 12 foo

In [35]:
#data.to_csv(sys.stdout, sep='|')

In [36]:
!cat ch06/ex7.csv


"a","b","c"
"1","2","3"
"1","2","3","4"

In [37]:
import csv
f = open('ch06/ex7.csv')
reader = csv.reader(f)

In [38]:
for line in reader: 
    print line


['a', 'b', 'c']
['1', '2', '3']
['1', '2', '3', '4']

In [39]:
lines = list(csv.reader(open('ch06/ex7.csv')))

In [41]:
header, values = lines[0], lines[1:]

In [45]:
data_dict = {h:v for h,v in zip(header, zip(*values))}
data_dict


Out[45]:
{'a': ('1', '1'), 'b': ('2', '2'), 'c': ('3', '3')}

In [44]:
{h:v for h,v in zip(header, zip(values))}


Out[44]:
{'a': (['1', '2', '3'],), 'b': (['1', '2', '3', '4'],)}

In [46]:
In [944]: import requests
In [945]: url = 'http://search.twitter.com/search.json?q=python%20pandas'
In [946]: resp = requests.get(url)

In [47]:
resp


Out[47]:
<Response [410]>

In [49]:
import json
json.loads(resp.text)


Out[49]:
{u'errors': [{u'code': 64,
   u'message': u'The Twitter REST API v1 is no longer active. Please migrate to API v1.1. https://dev.twitter.com/docs/api/1.1/overview.'}]}

In [ ]: