We will focus on three formats to store our data in disk:
Pandas provide functions to load csv, xls and pickle files:
We will see some examples of file loading:
In [3]:
import pandas as pd
iqsize = pd.read_csv("https://raw.githubusercontent.com/f-guitart/data_mining/master/data/iqsize.csv")
iqsize.head()
Out[3]:
In [4]:
type(iqsize)
Out[4]:
In [6]:
iqsize["sex"][:10]
Out[6]:
In [9]:
iqsize["sex"].to_csv("myseries.csv")
%ls myseries.csv
In [10]:
iqsize.to_excel("iqsize.xlsx")
%ls iqsize.xlsx
In [11]:
xls_iqsize = pd.read_excel("iqsize.xlsx")
xls_iqsize.head()
Out[11]:
In [17]:
my_df = pd.DataFrame({"a" : [{"apples": [1,2,3,4,6], "pears":2}, None, None, {"bannanas":4}],
"b" : [0,1,2,3]})
my_df.to_csv("mydf.csv")
In [20]:
my_df2 = pd.read_csv("mydf.csv")
type(my_df2.iloc[0,1])
Out[20]:
In [29]:
my_df.to_pickle("mydf.pickle")
my_df3 = pd.read_pickle("mydf.pickle")
my_df3.head()
Out[29]:
In [31]:
type(my_df3.iloc[0,0])
Out[31]:
In [23]:
train = pd.Series([1,2,3,4,5,6,7,8])
test = pd.Series([9,10,11])
pd.to_pickle({"train": train,
"test" : test},"my_pickle.pickle")
%ls my_pickle.pickle
In [24]:
my_pickle = pd.read_pickle("my_pickle.pickle")
my_pickle.keys()
Out[24]:
In [26]:
type(my_pickle['train'])
Out[26]: