In [8]:
import pandas as pd


/usr/local/lib/python2.7/dist-packages/pandas-0.13.1-py2.7-linux-i686.egg/pandas/io/gbq.py:10: UserWarning: Module PIL was already imported from /usr/lib/python2.7/dist-packages/PIL/__init__.pyc, but /usr/local/lib/python2.7/dist-packages is being added to sys.path
  import pkg_resources

In [5]:
cd "/home/bakuda/pandas-book/"


/home/bakuda/pandas-book

In [6]:
ls


ch02/  ch03/  ch06/  ch07/  ch08/  ch09/  ch11/  ch13/  flights.csv

In [9]:
df = pd.read_csv('ch06/ex1.csv')

In [10]:
df.shape


Out[10]:
(3, 5)

In [11]:
df


Out[11]:
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo

3 rows × 5 columns


In [15]:
#OR 
#df = pd.read_table('ch06/ex1.csv', sep=',')

In [17]:
pd.read_csv('ch06/ex2.csv', header=None)


Out[17]:
0 1 2 3 4
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo

3 rows × 5 columns


In [18]:
!cat ch06/ex2.csv


1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo

In [19]:
pd.read_csv('ch06/ex2.csv', names=['a', 'b', 'c', 'd', 'message'])


Out[19]:
a b c d message
0 1 2 3 4 hello
1 5 6 7 8 world
2 9 10 11 12 foo

3 rows × 5 columns


In [20]:
names = ['a', 'b', 'c', 'd', 'message']

In [24]:
pd.read_csv('ch06/ex2.csv', names=names, index_col=4)
# OR
#pd.read_csv('ch06/ex2.csv', names=names, index_col='message')


Out[24]:
a b c d
message
hello 1 2 3 4
world 5 6 7 8
foo 9 10 11 12

3 rows × 4 columns


In [25]:
parsed = pd.read_csv('ch06/csv_mindex.csv', index_col=['key1', 'key2'])

In [26]:
parsed


Out[26]:
value1 value2
key1 key2
one a 1 2
b 3 4
c 5 6
d 7 8
two a 9 10
b 11 12
c 13 14
d 15 16

8 rows × 2 columns


In [27]:
!cat ch06/csv_mindex.csv


key1,key2,value1,value2
one,a,1,2
one,b,3,4
one,c,5,6
one,d,7,8
two,a,9,10
two,b,11,12
two,c,13,14
two,d,15,16

In [28]:
list(open('ch06/ex3.txt'))


Out[28]:
['            A         B         C\n',
 'aaa -0.264438 -1.026059 -0.619500\n',
 'bbb  0.927272  0.302904 -0.032399\n',
 'ccc -0.264273 -0.386314 -0.217601\n',
 'ddd -0.871858 -0.348382  1.100491\n']

In [30]:
!cat ch06/ex3.txt


            A         B         C
aaa -0.264438 -1.026059 -0.619500
bbb  0.927272  0.302904 -0.032399
ccc -0.264273 -0.386314 -0.217601
ddd -0.871858 -0.348382  1.100491

In [35]:
pd.read_csv('ch06/ex3.txt', sep='\s+')


Out[35]:
A B C
aaa -0.264438 -1.026059 -0.619500
bbb 0.927272 0.302904 -0.032399
ccc -0.264273 -0.386314 -0.217601
ddd -0.871858 -0.348382 1.100491

4 rows × 3 columns


In [39]:
#reading nrows number of nows at a time
pd.read_csv('ch06/ex6.csv', nrows=4)


Out[39]:
one two three four key
0 0.467976 -0.038649 -0.295344 -1.824726 L
1 -0.358893 1.404453 0.704965 -0.200638 B
2 -0.501840 0.659254 -0.421691 -0.057688 G
3 0.204886 1.074134 1.388361 -0.982404 R

4 rows × 5 columns


In [50]:
chunker = pd.read_csv('ch06/ex6.csv', chunksize=1000)
tot = pd.Series([])
for i, piece in enumerate(chunker):
    #print 'chunker %d' %(i)
    tot = tot.add(piece['key'].value_counts(), fill_value=0)
    #print piece['key'].value_counts()
tot = tot.order(ascending=False)

In [51]:
#tot

In [ ]: