In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('data/src/sample.csv')
print(df)
In [3]:
print(df.columns)
In [4]:
df_none = pd.read_csv('data/src/sample.csv', header=None)
print(df_none)
In [5]:
df_names = pd.read_csv('data/src/sample.csv', names=('A', 'B', 'C', 'D'))
print(df_names)
In [6]:
df_header = pd.read_csv('data/src/sample_header.csv')
print(df_header)
In [7]:
df_header_0 = pd.read_csv('data/src/sample_header.csv', header=0)
print(df_header_0)
In [8]:
df_header_2 = pd.read_csv('data/src/sample_header.csv', header=2)
print(df_header_2)
In [9]:
df_header_index = pd.read_csv('data/src/sample_header_index.csv')
print(df_header_index)
In [10]:
print(df_header_index.index)
In [11]:
df_header_index_col = pd.read_csv('data/src/sample_header_index.csv', index_col=0)
print(df_header_index_col)
In [12]:
print(df_header_index_col.index)
In [13]:
df_none_usecols = pd.read_csv('data/src/sample.csv', header=None, usecols=[1, 3])
print(df_none_usecols)
In [14]:
df_none_usecols = pd.read_csv('data/src/sample.csv', header=None, usecols=[2])
print(df_none_usecols)
In [15]:
df_header_usecols = pd.read_csv('data/src/sample_header.csv', usecols=['a', 'c'])
print(df_header_usecols)
In [16]:
df_header_usecols = pd.read_csv('data/src/sample_header.csv',
usecols=lambda x: x is not 'b')
print(df_header_usecols)
In [17]:
df_header_usecols = pd.read_csv('data/src/sample_header.csv',
usecols=lambda x: x not in ['a', 'c'])
print(df_header_usecols)
In [18]:
df_index_usecols = pd.read_csv('data/src/sample_header_index.csv',
index_col=0, usecols=[0, 1, 3])
print(df_index_usecols)
In [19]:
df_none = pd.read_csv('data/src/sample.csv', header=None)
print(df_none)
In [20]:
df_none = pd.read_csv('data/src/sample.csv', header=None, skiprows=2)
print(df_none)
In [21]:
df_none_skiprows = pd.read_csv('data/src/sample.csv', header=None, skiprows=[0, 2])
print(df_none_skiprows)
In [22]:
df_none_skiprows = pd.read_csv('data/src/sample.csv', header=None, skiprows=[1])
print(df_none_skiprows)
In [23]:
df_none_skiprows = pd.read_csv('data/src/sample.csv', header=None,
skiprows=lambda x: x not in [0, 2])
print(df_none_skiprows)
In [24]:
df_header_skiprows = pd.read_csv('data/src/sample_header.csv', skiprows=[1])
print(df_header_skiprows)
In [25]:
df_header_skiprows = pd.read_csv('data/src/sample_header.csv', skiprows=[0, 3])
print(df_header_skiprows)
In [26]:
df_none_skipfooter = pd.read_csv('data/src/sample.csv', header=None,
skipfooter=1, engine='python')
print(df_none_skipfooter)
In [27]:
df_none_nrows = pd.read_csv('data/src/sample.csv', header=None, nrows=2)
print(df_none_nrows)
In [28]:
df_default = pd.read_csv('data/src/sample_header_index_dtype.csv', index_col=0)
print(df_default)
In [29]:
print(df_default.dtypes)
In [30]:
print(df_default.applymap(type))
In [31]:
df_str = pd.read_csv('data/src/sample_header_index_dtype.csv',
index_col=0, dtype=str)
print(df_str)
In [32]:
print(df_str.dtypes)
In [33]:
print(df_str.applymap(type))
In [34]:
df_object = pd.read_csv('data/src/sample_header_index_dtype.csv',
index_col=0, dtype=object)
print(df_object)
In [35]:
print(df_object.dtypes)
In [36]:
print(df_object.applymap(type))
In [37]:
# df_int = pd.read_csv('data/src/sample_header_index_dtype.csv',
# index_col=0, dtype=int)
# ValueError: invalid literal for int() with base 10: 'ONE'
In [38]:
df_str_cast = df_str.astype({'a': int})
print(df_str_cast)
In [39]:
print(df_str_cast.dtypes)
In [40]:
df_str_col = pd.read_csv('data/src/sample_header_index_dtype.csv',
index_col=0, dtype={'b': str, 'c': str})
print(df_str_col)
In [41]:
print(df_str_col.dtypes)
In [42]:
df_str_col_num = pd.read_csv('data/src/sample_header_index_dtype.csv',
index_col=0, dtype={2: str, 3: str})
print(df_str_col_num)
In [43]:
print(df_str_col_num.dtypes)
In [44]:
df_nan = pd.read_csv('data/src/sample_header_index_nan.csv', index_col=0)
print(df_nan)
In [45]:
print(df_nan.isnull())
In [46]:
df_nan_set_na = pd.read_csv('data/src/sample_header_index_nan.csv',
index_col=0, na_values='-')
print(df_nan_set_na)
In [47]:
print(df_nan_set_na.isnull())
In [48]:
df_nan_set_na_no_keep = pd.read_csv('data/src/sample_header_index_nan.csv',
index_col=0, na_values=['-', 'NaN', 'null'],
keep_default_na=False)
print(df_nan_set_na_no_keep)
In [49]:
print(df_nan_set_na_no_keep.isnull())
In [50]:
df_nan_no_filter = pd.read_csv('data/src/sample_header_index_nan.csv',
index_col=0, na_filter=False)
print(df_nan_no_filter)
In [51]:
print(df_nan_no_filter.isnull())
In [52]:
df_tsv = pd.read_table('data/src/sample_header_index.tsv', index_col=0)
print(df_tsv)
In [53]:
df_tsv_sep = pd.read_csv('data/src/sample_header_index.tsv', index_col=0, sep='\t')
print(df_tsv_sep)