In [22]:
import numpy as np
import pandas as pd
import os
import sys
import matplotlib
%matplotlib inline
from pandas import Series, DataFrame
In [23]:
!cat ../data/ch3/ex1.csv
# !type ../data/ch3/ex1.csv
In [24]:
pd.__version__
Out[24]:
In [25]:
data = pd.read_csv('../data/ch3/ex1.csv')
data
Out[25]:
In [26]:
!cat ../data/ch3/ex2.csv
# !type ../data/ch3/ex2.csv
In [27]:
data3 = pd.read_csv('../data/ch3/ex2.csv', header = None)
# data2 = pd.read_csv('../data/ch3/ex2.csv', names=['a', 'b', 'c', 'd', 'message'])
data3
Out[27]:
In [28]:
datan = pd.read_csv('../data/ch3/ex1.csv', usecols = ['a', 'b', 'c'])
datan
Out[28]:
In [29]:
names = ['a', 'b', 'c', 'd', 'message']
data4 = pd.read_csv('../data/ch3/ex2.csv', names=names, index_col='message')
data4
Out[29]:
In [30]:
data.a
Out[30]:
In [31]:
data['a']
Out[31]:
In [32]:
data[data.columns[[1, 2]]]
Out[32]:
In [33]:
!cat ../data/ch3/ex5.csv
#!type ../data/ch3/ex5.csv
In [34]:
data5 = pd.read_csv('../data/ch3/ex5.csv')
# print data5.isnull()
# print data5.isnull().values
print data5[data5.isnull().values == True]
In [35]:
data6 = pd.read_csv('../data/ch3/ex5.csv', na_values = ['world'])
data6
Out[35]:
In [36]:
sentinels = {'message':['foo', 'NA'], 'something':['two']}
data7 = pd.read_csv('../data/ch3/ex5.csv', na_values = sentinels)
data7
Out[36]:
In [37]:
data8 = pd.read_csv('../data/ch3/ex6.csv', nrows = 5)
data8
Out[37]:
In [38]:
# chunksize specifies the number of rows in each chunk
chunker = pd.read_csv('../data/ch3/ex6.csv', chunksize = 1000)
tot = Series([])
for piece in chunker:
tot = tot.add(piece['key'].value_counts(), fill_value = 0)
tot = tot.sort_values(ascending = False)
tot
Out[38]:
In [39]:
data.to_csv('ch3-out1.csv')
In [40]:
data.to_csv('ch3-out2.csv', index = False)
data.to_csv('ch3-out3.csv', index = False, header = False, columns = ['a', 'b', 'c'])
data5.to_csv('../data/ch3/ex1.tsv', index = False, header = False, sep = '\t')
!cat '../data/ch3/ex1.tsv'
In [41]:
data = pd.read_csv('../data/ch3/ex1.tsv', sep = '\t')
data
Out[41]:
In [42]:
data = pd.read_table('../data/ch3/ex1.tsv')
data
Out[42]:
In [43]:
data5.to_csv('ch3-out4.tsv', sep = '\t', na_rep = 'NULL', index = False, header = False)
!cat 'ch3-out4.tsv'