In [8]:
import pandas as pd
import numpy as np
from pandas import DataFrame, Series
In [9]:
df = pd.read_csv('ex1.csv')
df
Out[9]:
In [10]:
pd.read_table('ex1.csv', sep=',')
Out[10]:
In [13]:
pd.read_csv('ex2.csv', header=None)
Out[13]:
In [14]:
names = ['a', 'b', 'c', 'd', 'message']
pd.read_csv('ex2.csv', names = names, index_col = 'message')
Out[14]:
In [18]:
parsed = pd.read_csv('csv_minindex.csv', index_col = ['key1', 'key2'])
parsed
Out[18]:
In [19]:
list(open('ex3.txt'))
Out[19]:
In [21]:
result = pd.read_table('ex3.txt', sep='\s+')
result
Out[21]:
In [22]:
pd.read_csv('ex4.csv', skiprows=[0, 2, 3])
Out[22]:
In [23]:
result = pd.read_csv('ex5.csv')
result
Out[23]:
In [24]:
pd.isnull(result)
Out[24]:
In [30]:
result = pd.read_csv('ex5.csv', na_values=['NULL'])
result
Out[30]:
In [32]:
sentinels = {'message' : ['foo', 'NA'],
'something' : ['two']}
pd.read_csv('ex5.csv', na_values=sentinels)
Out[32]:
In [33]:
result = pd.read_csv('ex6.csv')
result
Out[33]:
In [34]:
pd.read_csv('ex6.csv', nrows=5)
Out[34]:
In [36]:
chunker = pd.read_csv('ex6.csv', chunksize=1000)
chunker
Out[36]:
In [39]:
tot = Series([])
for piece in pd.read_csv('ex6.csv', chunksize=1000):
tot = tot.add(piece['key'].value_counts(), fill_value=0)
tot = tot.sort_values(ascending = False)
tot
Out[39]:
In [40]:
data = pd.read_csv('ex5.csv')
data
Out[40]:
In [41]:
data.to_csv('out.csv')
In [42]:
!cat out.csv
In [44]:
import sys
data.to_csv(sys.stdout, sep='|')
In [47]:
data.to_csv(sys.stdout, na_rep='NULL')
In [48]:
data.to_csv(sys.stdout, index = False, header = False)
In [50]:
data.to_csv(sys.stdout, index = False, columns = ['a', 'b', 'c'])
In [52]:
dates = pd.date_range('1/1/2000', periods = 7)
dates
Out[52]:
In [54]:
ts = Series(np.arange(7), index=dates)
ts
Out[54]:
In [55]:
ts.to_csv('tseries.csv')
In [56]:
!cat tseries.csv
In [57]:
Series.from_csv('tseries.csv', parse_dates=True)
Out[57]:
In [59]:
import csv
f = open('ex7.csv')
reader = csv.reader(f)
for line in reader:
print(line)
In [61]:
lines = list(csv.reader(open('ex7.csv')))
lines
Out[61]:
In [63]:
header, values = lines[0], lines[1:]
data_dict = {h : v for h, v in zip(header, zip(*values))}
data_dict
Out[63]:
In [64]:
obj = """
{"name": "Wes",
"places_lived": ["United States", "Spain", "Germany"],
"pet": null,
"siblings": [{"name": "Scott", "age": 25, "pet": "Zuko"},
{"name": "Katie", "age": 33, "pet": "Cisco"}]
}
"""
In [66]:
import json
result = json.loads(obj)
result
Out[66]:
In [67]:
asjson = json.dumps(result)
asjson
Out[67]:
In [69]:
siblings = DataFrame(result['siblings'], columns = ['name', 'age'])
siblings
Out[69]:
In [71]:
from lxml.html import parse
from urllib2 import urlopen
parsed = parse(urlopen('http://finance.yahoo.com/q/op?s=AAPL+Options'))
doc = parsed.getroot()
doc
In [74]:
frame = pd.read_csv('ex1.csv')
In [ ]: