notebook.community

Edit and run



In [8]:

    
import pandas as pd
import numpy as np
from pandas import DataFrame, Series



In [9]:

    
df = pd.read_csv('ex1.csv')
df



In [10]:

    
pd.read_table('ex1.csv', sep=',')



In [13]:

    
pd.read_csv('ex2.csv', header=None)



In [14]:

    
names = ['a', 'b', 'c', 'd', 'message']
pd.read_csv('ex2.csv', names = names, index_col = 'message')



In [18]:

    
parsed = pd.read_csv('csv_minindex.csv', index_col = ['key1', 'key2'])
parsed



In [19]:

    
list(open('ex3.txt'))









    Out[19]:





['        A      B      C\n',
 'aaa -0.22 -3.123  1.234\n',
 'bbb  0.92  3.123 -3.333\n',
 'ccc  1.22  1.222  3.444\n',
 'ddd  12.1  6.666  6.666\n']



In [21]:

    
result = pd.read_table('ex3.txt', sep='\s+')
result



In [22]:

    
pd.read_csv('ex4.csv', skiprows=[0, 2, 3])



In [23]:

    
result = pd.read_csv('ex5.csv')
result



In [24]:

    
pd.isnull(result)









    Out[24]:






  
    
      
      something
      a
      b
      c
      d
      message
    
  
  
    
      0
      False
      False
      False
      False
      False
      True
    
    
      1
      False
      False
      False
      True
      False
      False
    
    
      2
      False
      False
      False
      False
      False
      False



In [30]:

    
result = pd.read_csv('ex5.csv', na_values=['NULL'])
result



In [32]:

    
sentinels = {'message' : ['foo', 'NA'],
             'something' : ['two']}
pd.read_csv('ex5.csv', na_values=sentinels)



In [33]:

    
result = pd.read_csv('ex6.csv')
result









    Out[33]:






  
    
      
      one
      two
      three
      four
      key
    
  
  
    
      0
      0.467976
      -0.038649
      -0.295344
      -1.824726
      L
    
    
      1
      -0.358893
      1.404453
      0.704965
      -0.200638
      B
    
    
      2
      -0.501840
      0.659254
      -0.421691
      -0.057688
      G
    
    
      3
      0.204886
      1.074134
      1.388361
      -0.982404
      R
    
    
      4
      0.354628
      -0.133116
      0.283763
      -0.837063
      Q
    
    
      5
      1.817480
      0.742273
      0.419395
      -2.251035
      Q
    
    
      6
      -0.776764
      0.935518
      -0.332872
      -1.875641
      U
    
    
      7
      -0.913135
      1.530624
      -0.572657
      0.477252
      K
    
    
      8
      0.358480
      -0.497572
      -0.367016
      0.507702
      S
    
    
      9
      -1.740877
      -1.160417
      -1.637830
      2.172201
      G
    
    
      10
      0.240564
      -0.328249
      1.252155
      1.072796
      8
    
    
      11
      0.764018
      1.165476
      -0.639544
      1.495258
      R
    
    
      12
      0.571035
      -0.310537
      0.582437
      -0.298765
      1
    
    
      13
      2.317658
      0.430710
      -1.334216
      0.199679
      P
    
    
      14
      1.547771
      -1.119753
      -2.277634
      0.329586
      J
    
    
      15
      -1.310608
      0.401719
      -1.000987
      1.156708
      E
    
    
      16
      -0.088496
      0.634712
      0.153324
      0.415335
      B
    
    
      17
      -0.018663
      -0.247487
      -1.446522
      0.750938
      A
    
    
      18
      -0.070127
      -1.579097
      0.120892
      0.671432
      F
    
    
      19
      -0.194678
      -0.492039
      2.359605
      0.319810
      H
    
    
      20
      -0.248618
      0.868707
      -0.492226
      -0.717959
      W
    
    
      21
      -1.091549
      -0.867110
      -0.647760
      -0.832562
      C
    
    
      22
      0.641404
      -0.138822
      -0.621963
      -0.284839
      C
    
    
      23
      1.216408
      0.992687
      0.165162
      -0.069619
      V
    
    
      24
      -0.564474
      0.792832
      0.747053
      0.571675
      I
    
    
      25
      1.759879
      -0.515666
      -0.230481
      1.362317
      S
    
    
      26
      0.126266
      0.309281
      0.382820
      -0.239199
      L
    
    
      27
      1.334360
      -0.100152
      -0.840731
      -0.643967
      6
    
    
      28
      -0.737620
      0.278087
      -0.053235
      -0.950972
      J
    
    
      29
      -1.148486
      -0.986292
      -0.144963
      0.124362
      Y
    
    
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      9970
      0.633495
      -0.186524
      0.927627
      0.143164
      4
    
    
      9971
      0.308636
      -0.112857
      0.762842
      -1.072977
      1
    
    
      9972
      -1.627051
      -0.978151
      0.154745
      -1.229037
      Z
    
    
      9973
      0.314847
      0.097989
      0.199608
      0.955193
      P
    
    
      9974
      1.666907
      0.992005
      0.496128
      -0.686391
      S
    
    
      9975
      0.010603
      0.708540
      -1.258711
      0.226541
      K
    
    
      9976
      0.118693
      -0.714455
      -0.501342
      -0.254764
      K
    
    
      9977
      0.302616
      -2.011527
      -0.628085
      0.768827
      H
    
    
      9978
      -0.098572
      1.769086
      -0.215027
      -0.053076
      A
    
    
      9979
      -0.019058
      1.964994
      0.738538
      -0.883776
      F
    
    
      9980
      -0.595349
      0.001781
      -1.423355
      -1.458477
      M
    
    
      9981
      1.392170
      -1.396560
      -1.425306
      -0.847535
      H
    
    
      9982
      -0.896029
      -0.152287
      1.924483
      0.365184
      6
    
    
      9983
      -2.274642
      -0.901874
      1.500352
      0.996541
      N
    
    
      9984
      -0.301898
      1.019906
      1.102160
      2.624526
      I
    
    
      9985
      -2.548389
      -0.585374
      1.496201
      -0.718815
      D
    
    
      9986
      -0.064588
      0.759292
      -1.568415
      -0.420933
      E
    
    
      9987
      -0.143365
      -1.111760
      -1.815581
      0.435274
      2
    
    
      9988
      -0.070412
      -1.055921
      0.338017
      -0.440763
      X
    
    
      9989
      0.649148
      0.994273
      -1.384227
      0.485120
      Q
    
    
      9990
      -0.370769
      0.404356
      -1.051628
      -1.050899
      8
    
    
      9991
      -0.409980
      0.155627
      -0.818990
      1.277350
      W
    
    
      9992
      0.301214
      -1.111203
      0.668258
      0.671922
      A
    
    
      9993
      1.821117
      0.416445
      0.173874
      0.505118
      X
    
    
      9994
      0.068804
      1.322759
      0.802346
      0.223618
      H
    
    
      9995
      2.311896
      -0.417070
      -1.409599
      -0.515821
      L
    
    
      9996
      -0.479893
      -0.650419
      0.745152
      -0.646038
      E
    
    
      9997
      0.523331
      0.787112
      0.486066
      1.093156
      K
    
    
      9998
      -0.362559
      0.598894
      -1.843201
      0.887292
      G
    
    
      9999
      -0.096376
      -1.012999
      -0.657431
      -0.573315
      0
    
  

10000 rows × 5 columns



In [34]:

    
pd.read_csv('ex6.csv', nrows=5)



In [36]:

    
chunker = pd.read_csv('ex6.csv', chunksize=1000)
chunker









    Out[36]:





<pandas.io.parsers.TextFileReader at 0x10c34b2e8>



In [39]:

    
tot = Series([])
for piece in pd.read_csv('ex6.csv', chunksize=1000):
    tot = tot.add(piece['key'].value_counts(), fill_value=0)
tot = tot.sort_values(ascending = False)
tot









    Out[39]:





E    368.0
X    364.0
L    346.0
O    343.0
Q    340.0
M    338.0
J    337.0
F    335.0
K    334.0
H    330.0
V    328.0
I    327.0
U    326.0
P    324.0
D    320.0
A    320.0
R    318.0
Y    314.0
G    308.0
S    308.0
N    306.0
W    305.0
T    304.0
B    302.0
Z    288.0
C    286.0
4    171.0
6    166.0
7    164.0
8    162.0
3    162.0
5    157.0
2    152.0
0    151.0
9    150.0
1    146.0
dtype: float64



In [40]:

    
data = pd.read_csv('ex5.csv')
data



In [41]:

    
data.to_csv('out.csv')



In [42]:

    
!cat out.csv









    



,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,two,5,6,,8,world
2,three,9,10,11.0,12,foo



In [44]:

    
import sys
data.to_csv(sys.stdout, sep='|')









    



|something|a|b|c|d|message
0|one|1|2|3.0|4|
1|two|5|6||8|world
2|three|9|10|11.0|12|foo



In [47]:

    
data.to_csv(sys.stdout, na_rep='NULL')









    



,something,a,b,c,d,message
0,one,1,2,3.0,4,NULL
1,two,5,6,NULL,8,world
2,three,9,10,11.0,12,foo



In [48]:

    
data.to_csv(sys.stdout, index = False, header = False)









    



one,1,2,3.0,4,
two,5,6,,8,world
three,9,10,11.0,12,foo



In [50]:

    
data.to_csv(sys.stdout, index = False, columns = ['a', 'b', 'c'])









    



a,b,c
1,2,3.0
5,6,
9,10,11.0



In [52]:

    
dates = pd.date_range('1/1/2000', periods = 7)
dates









    Out[52]:





DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04',
               '2000-01-05', '2000-01-06', '2000-01-07'],
              dtype='datetime64[ns]', freq='D')



In [54]:

    
ts = Series(np.arange(7), index=dates)
ts









    Out[54]:





2000-01-01    0
2000-01-02    1
2000-01-03    2
2000-01-04    3
2000-01-05    4
2000-01-06    5
2000-01-07    6
Freq: D, dtype: int64



In [55]:

    
ts.to_csv('tseries.csv')



In [56]:

    
!cat tseries.csv









    



2000-01-01,0
2000-01-02,1
2000-01-03,2
2000-01-04,3
2000-01-05,4
2000-01-06,5
2000-01-07,6



In [57]:

    
Series.from_csv('tseries.csv', parse_dates=True)









    Out[57]:





2000-01-01    0
2000-01-02    1
2000-01-03    2
2000-01-04    3
2000-01-05    4
2000-01-06    5
2000-01-07    6
dtype: int64



In [59]:

    
import csv
f = open('ex7.csv')
reader = csv.reader(f)
for line in reader:
    print(line)









    



['a', 'b', 'c']
['1', '2', '3']
['1', '2', '3', '4']



In [61]:

    
lines = list(csv.reader(open('ex7.csv')))
lines









    Out[61]:





[['a', 'b', 'c'], ['1', '2', '3'], ['1', '2', '3', '4']]



In [63]:

    
header, values = lines[0], lines[1:]
data_dict = {h : v for h, v in zip(header, zip(*values))}
data_dict









    Out[63]:





{'a': ('1', '1'), 'b': ('2', '2'), 'c': ('3', '3')}



In [64]:

    
obj = """
{"name": "Wes",
 "places_lived": ["United States", "Spain", "Germany"],
 "pet": null,
 "siblings": [{"name": "Scott", "age": 25, "pet": "Zuko"},
              {"name": "Katie", "age": 33, "pet": "Cisco"}]
}
"""



In [66]:

    
import json
result = json.loads(obj)
result









    Out[66]:





{'name': 'Wes',
 'pet': None,
 'places_lived': ['United States', 'Spain', 'Germany'],
 'siblings': [{'age': 25, 'name': 'Scott', 'pet': 'Zuko'},
  {'age': 33, 'name': 'Katie', 'pet': 'Cisco'}]}



In [67]:

    
asjson = json.dumps(result)
asjson









    Out[67]:





'{"name": "Wes", "places_lived": ["United States", "Spain", "Germany"], "pet": null, "siblings": [{"name": "Scott", "age": 25, "pet": "Zuko"}, {"name": "Katie", "age": 33, "pet": "Cisco"}]}'



In [69]:

    
siblings = DataFrame(result['siblings'], columns = ['name', 'age'])
siblings



In [71]:

    
from lxml.html import parse
from urllib2 import urlopen

parsed = parse(urlopen('http://finance.yahoo.com/q/op?s=AAPL+Options'))
doc = parsed.getroot()
doc









    



---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-71-88d78579c550> in <module>()
      1 from lxml.html import parse
----> 2 from urllib2 import urlopen
      3 
      4 parsed = parse(urlopen('http://finance.yahoo.com/q/op?s=AAPL+Options'))
      5 doc = parsed.getroot()

ModuleNotFoundError: No module named 'urllib2'



In [74]:

    
frame = pd.read_csv('ex1.csv')



In [ ]:

	A	B	C
aaa	-0.22	-3.123	1.234
bbb	0.92	3.123	-3.333
ccc	1.22	1.222	3.444
ddd	12.10	6.666	6.666

	something	a	b	c	d	message
0	False	False	False	False	False	True
1	False	False	False	True	False	False
2	False	False	False	False	False	False

	one	two	three	four	key
0	0.467976	-0.038649	-0.295344	-1.824726	L
1	-0.358893	1.404453	0.704965	-0.200638	B
2	-0.501840	0.659254	-0.421691	-0.057688	G
3	0.204886	1.074134	1.388361	-0.982404	R
4	0.354628	-0.133116	0.283763	-0.837063	Q
5	1.817480	0.742273	0.419395	-2.251035	Q
6	-0.776764	0.935518	-0.332872	-1.875641	U
7	-0.913135	1.530624	-0.572657	0.477252	K
8	0.358480	-0.497572	-0.367016	0.507702	S
9	-1.740877	-1.160417	-1.637830	2.172201	G
10	0.240564	-0.328249	1.252155	1.072796	8
11	0.764018	1.165476	-0.639544	1.495258	R
12	0.571035	-0.310537	0.582437	-0.298765	1
13	2.317658	0.430710	-1.334216	0.199679	P
14	1.547771	-1.119753	-2.277634	0.329586	J
15	-1.310608	0.401719	-1.000987	1.156708	E
16	-0.088496	0.634712	0.153324	0.415335	B
17	-0.018663	-0.247487	-1.446522	0.750938	A
18	-0.070127	-1.579097	0.120892	0.671432	F
19	-0.194678	-0.492039	2.359605	0.319810	H
20	-0.248618	0.868707	-0.492226	-0.717959	W
21	-1.091549	-0.867110	-0.647760	-0.832562	C
22	0.641404	-0.138822	-0.621963	-0.284839	C
23	1.216408	0.992687	0.165162	-0.069619	V
24	-0.564474	0.792832	0.747053	0.571675	I
25	1.759879	-0.515666	-0.230481	1.362317	S
26	0.126266	0.309281	0.382820	-0.239199	L
27	1.334360	-0.100152	-0.840731	-0.643967	6
28	-0.737620	0.278087	-0.053235	-0.950972	J
29	-1.148486	-0.986292	-0.144963	0.124362	Y
...	...	...	...	...	...
9970	0.633495	-0.186524	0.927627	0.143164	4
9971	0.308636	-0.112857	0.762842	-1.072977	1
9972	-1.627051	-0.978151	0.154745	-1.229037	Z
9973	0.314847	0.097989	0.199608	0.955193	P
9974	1.666907	0.992005	0.496128	-0.686391	S
9975	0.010603	0.708540	-1.258711	0.226541	K
9976	0.118693	-0.714455	-0.501342	-0.254764	K
9977	0.302616	-2.011527	-0.628085	0.768827	H
9978	-0.098572	1.769086	-0.215027	-0.053076	A
9979	-0.019058	1.964994	0.738538	-0.883776	F
9980	-0.595349	0.001781	-1.423355	-1.458477	M
9981	1.392170	-1.396560	-1.425306	-0.847535	H
9982	-0.896029	-0.152287	1.924483	0.365184	6
9983	-2.274642	-0.901874	1.500352	0.996541	N
9984	-0.301898	1.019906	1.102160	2.624526	I
9985	-2.548389	-0.585374	1.496201	-0.718815	D
9986	-0.064588	0.759292	-1.568415	-0.420933	E
9987	-0.143365	-1.111760	-1.815581	0.435274	2
9988	-0.070412	-1.055921	0.338017	-0.440763	X
9989	0.649148	0.994273	-1.384227	0.485120	Q
9990	-0.370769	0.404356	-1.051628	-1.050899	8
9991	-0.409980	0.155627	-0.818990	1.277350	W
9992	0.301214	-1.111203	0.668258	0.671922	A
9993	1.821117	0.416445	0.173874	0.505118	X
9994	0.068804	1.322759	0.802346	0.223618	H
9995	2.311896	-0.417070	-1.409599	-0.515821	L
9996	-0.479893	-0.650419	0.745152	-0.646038	E
9997	0.523331	0.787112	0.486066	1.093156	K
9998	-0.362559	0.598894	-1.843201	0.887292	G
9999	-0.096376	-1.012999	-0.657431	-0.573315	0