notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np
%matplotlib inline



In [2]:

    
pwd









    Out[2]:





u'/home/topo/repos/ghub/pynotebooks'



In [3]:

    
cd /home/topo/repos/ghub/pydata-book









    



/home/topo/repos/ghub/pydata-book



In [4]:

    
pd.read_csv('ch06/ex2.csv', header=None)



In [5]:

    
pd.read_csv('ch06/ex2.csv', names=['a', 'b', 'c', 'd', 'message'])



In [6]:

    
names = ['a', 'b', 'c', 'd', 'message']



In [7]:

    
pd.read_csv('ch06/ex2.csv', names=names, index_col='message')



In [8]:

    
pd.read_csv('ch06/ex2.csv', names=names, index_col='a')



In [9]:

    
!cat ch06/csv_mindex.csv









    



key1,key2,value1,value2
one,a,1,2
one,b,3,4
one,c,5,6
one,d,7,8
two,a,9,10
two,b,11,12
two,c,13,14
two,d,15,16



In [10]:

    
parsed = pd.read_csv('ch06/csv_mindex.csv', index_col=['key1', 'key2'])



In [11]:

    
parsed



In [12]:

    
list(open('ch06/ex3.txt'))









    Out[12]:





['            A         B         C\n',
 'aaa -0.264438 -1.026059 -0.619500\n',
 'bbb  0.927272  0.302904 -0.032399\n',
 'ccc -0.264273 -0.386314 -0.217601\n',
 'ddd -0.871858 -0.348382  1.100491\n']



In [13]:

    
pd.read_table('ch06/ex3.txt', sep='\s+')



In [14]:

    
!cat ch06/ex4.csv









    



# hey!
a,b,c,d,message
# just wanted to make things more difficult for you
# who reads CSV files with computers, anyway?
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo



In [15]:

    
pd.read_table('ch06/ex4.csv', sep=',')









    Out[15]:






  
    
      
      
      
      
      # hey!
    
  
  
    
      a
      b
      c
      d
      message
    
    
      # just wanted to make things more difficult for you
      NaN
      NaN
      NaN
      NaN
    
    
      # who reads CSV files with computers
      anyway?
      NaN
      NaN
      NaN
    
    
      1
      2
      3
      4
      hello
    
    
      5
      6
      7
      8
      world
    
    
      9
      10
      11
      12
      foo



In [16]:

    
pd.read_table('ch06/ex4.csv', sep=',', skiprows=[0,2,3])



In [17]:

    
!cat ch06/ex5.csv









    



something,a,b,c,d,message
one,1,2,3,4,NA
two,5,6,,8,world
three,9,10,11,12,foo



In [20]:

    
res = pd.read_csv('ch06/ex5.csv')
res



In [21]:

    
pd.isnull(res)









    Out[21]:






  
    
      
      something
      a
      b
      c
      d
      message
    
  
  
    
      0
      False
      False
      False
      False
      False
      True
    
    
      1
      False
      False
      False
      True
      False
      False
    
    
      2
      False
      False
      False
      False
      False
      False



In [25]:

    
result = pd.read_csv('ch06/ex5.csv', na_values=['NA'])
result



In [26]:

    
sentinels = {'message': ['foo', 'NA'], 'something': ['two']}
sentinels









    Out[26]:





{'message': ['foo', 'NA'], 'something': ['two']}



In [27]:

    
#custom values for NA treatment
pd.read_csv('ch06/ex5.csv', na_values=sentinels)



In [28]:

    
result = pd.read_csv('ch06/ex6.csv')



In [30]:

    
result.head()



In [32]:

    
pd.read_csv('ch06/ex6.csv', nrows=6)



In [33]:

    
data = pd.read_csv('ch06/ex5.csv')
data



In [35]:

    
#data.to_csv(sys.stdout, sep='|')



In [36]:

    
!cat ch06/ex7.csv









    



"a","b","c"
"1","2","3"
"1","2","3","4"



In [37]:

    
import csv
f = open('ch06/ex7.csv')
reader = csv.reader(f)



In [38]:

    
for line in reader: 
    print line









    



['a', 'b', 'c']
['1', '2', '3']
['1', '2', '3', '4']



In [39]:

    
lines = list(csv.reader(open('ch06/ex7.csv')))



In [41]:

    
header, values = lines[0], lines[1:]



In [45]:

    
data_dict = {h:v for h,v in zip(header, zip(*values))}
data_dict









    Out[45]:





{'a': ('1', '1'), 'b': ('2', '2'), 'c': ('3', '3')}



In [44]:

    
{h:v for h,v in zip(header, zip(values))}









    Out[44]:





{'a': (['1', '2', '3'],), 'b': (['1', '2', '3', '4'],)}



In [46]:

    
In [944]: import requests
In [945]: url = 'http://search.twitter.com/search.json?q=python%20pandas'
In [946]: resp = requests.get(url)



In [47]:

    
resp









    Out[47]:





<Response [410]>



In [49]:

    
import json
json.loads(resp.text)









    Out[49]:





{u'errors': [{u'code': 64,
   u'message': u'The Twitter REST API v1 is no longer active. Please migrate to API v1.1. https://dev.twitter.com/docs/api/1.1/overview.'}]}



In [ ]:

	one	two	three	four	key
0	0.467976	-0.038649	-0.295344	-1.824726	L
1	-0.358893	1.404453	0.704965	-0.200638	B
2	-0.501840	0.659254	-0.421691	-0.057688	G
3	0.204886	1.074134	1.388361	-0.982404	R
4	0.354628	-0.133116	0.283763	-0.837063	Q

	one	two	three	four	key
0	0.467976	-0.038649	-0.295344	-1.824726	L
1	-0.358893	1.404453	0.704965	-0.200638	B
2	-0.501840	0.659254	-0.421691	-0.057688	G
3	0.204886	1.074134	1.388361	-0.982404	R
4	0.354628	-0.133116	0.283763	-0.837063	Q
5	1.817480	0.742273	0.419395	-2.251035	Q

	A	B	C
aaa	-0.264438	-1.026059	-0.619500
bbb	0.927272	0.302904	-0.032399
ccc	-0.264273	-0.386314	-0.217601
ddd	-0.871858	-0.348382	1.100491

				# hey!
a	b	c	d	message
# just wanted to make things more difficult for you	NaN	NaN	NaN	NaN
# who reads CSV files with computers	anyway?	NaN	NaN	NaN
1	2	3	4	hello
5	6	7	8	world
9	10	11	12	foo

	something	a	b	c	d	message
0	False	False	False	False	False	True
1	False	False	False	True	False	False
2	False	False	False	False	False	False