notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np



In [2]:

    
df_simple = pd.DataFrame(np.arange(12).reshape(3, 4))



In [3]:

    
print(df_simple)









    



   0  1   2   3
0  0  1   2   3
1  4  5   6   7
2  8  9  10  11



In [4]:

    
print(df_simple.values)









    



[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]



In [5]:

    
print(type(df_simple.values))









    



<class 'numpy.ndarray'>



In [6]:

    
print(df_simple.columns)









    



RangeIndex(start=0, stop=4, step=1)



In [7]:

    
print(type(df_simple.columns))









    



<class 'pandas.core.indexes.range.RangeIndex'>



In [8]:

    
print(df_simple.index)









    



RangeIndex(start=0, stop=3, step=1)



In [9]:

    
print(type(df_simple.index))









    



<class 'pandas.core.indexes.range.RangeIndex'>



In [10]:

    
print(list(df_simple.columns))









    



[0, 1, 2, 3]



In [11]:

    
print(type(list(df_simple.columns)))









    



<class 'list'>



In [12]:

    
print(df_simple.columns.tolist())









    



[0, 1, 2, 3]



In [13]:

    
print(type(df_simple.columns.tolist()))









    



<class 'list'>



In [14]:

    
df = pd.DataFrame(np.arange(12).reshape(3, 4),
                  columns=['col_0', 'col_1', 'col_2', 'col_3'],
                  index=['row_0', 'row_1', 'row_2'])



In [15]:

    
print(df)









    



       col_0  col_1  col_2  col_3
row_0      0      1      2      3
row_1      4      5      6      7
row_2      8      9     10     11



In [16]:

    
print(df.columns)









    



Index(['col_0', 'col_1', 'col_2', 'col_3'], dtype='object')



In [17]:

    
print(type(df.columns))









    



<class 'pandas.core.indexes.base.Index'>



In [18]:

    
print(df.index)









    



Index(['row_0', 'row_1', 'row_2'], dtype='object')



In [19]:

    
print(type(df.index))









    



<class 'pandas.core.indexes.base.Index'>



In [20]:

    
print(df.columns.tolist())









    



['col_0', 'col_1', 'col_2', 'col_3']



In [21]:

    
print(type(df.columns.tolist()))









    



<class 'list'>



In [22]:

    
print(df.columns[0])









    



col_0



In [23]:

    
# df.columns[0] = 'Col_0'
# TypeError: Index does not support mutable operations



In [24]:

    
df.columns = ['Col_0', 'Col_1', 'Col_2', 'Col_3']



In [25]:

    
df.index = ['Row_0', 'Row_1', 'Row_2']



In [26]:

    
print(df)









    



       Col_0  Col_1  Col_2  Col_3
Row_0      0      1      2      3
Row_1      4      5      6      7
Row_2      8      9     10     11



In [27]:

    
# df.values = np.arange(12).reshape(3, 4) * 10
# AttributeError: can't set attribute



In [28]:

    
print(df['Col_1'])









    



Row_0    1
Row_1    5
Row_2    9
Name: Col_1, dtype: int64



In [29]:

    
print(type(df['Col_1']))









    



<class 'pandas.core.series.Series'>



In [30]:

    
print(df.Col_1)









    



Row_0    1
Row_1    5
Row_2    9
Name: Col_1, dtype: int64



In [31]:

    
print(df.loc[:, 'Col_1'])









    



Row_0    1
Row_1    5
Row_2    9
Name: Col_1, dtype: int64



In [32]:

    
print(df.loc['Row_1', :])









    



Col_0    4
Col_1    5
Col_2    6
Col_3    7
Name: Row_1, dtype: int64



In [33]:

    
print(type(df.loc['Row_1', :]))









    



<class 'pandas.core.series.Series'>



In [34]:

    
print(df.loc['Row_1'])









    



Col_0    4
Col_1    5
Col_2    6
Col_3    7
Name: Row_1, dtype: int64



In [35]:

    
print(df.loc[['Row_0', 'Row_2'], ['Col_1', 'Col_3']])









    



       Col_1  Col_3
Row_0      1      3
Row_2      9     11



In [36]:

    
print(type(df.loc[['Row_0', 'Row_2'], ['Col_1', 'Col_3']]))









    



<class 'pandas.core.frame.DataFrame'>



In [37]:

    
print(df.loc['Row_0', 'Col_1'])



In [38]:

    
print(type(df.loc['Row_0', 'Col_1']))









    



<class 'numpy.int64'>



In [39]:

    
print(df.at['Row_0', 'Col_1'])



In [40]:

    
# print(df.at[:, 'Col_1'])
# TypeError: 'slice(None, None, None)' is an invalid key



In [41]:

    
print(df.iloc[[0, 2], [1, 3]])









    



       Col_1  Col_3
Row_0      1      3
Row_2      9     11



In [42]:

    
print(df.iat[0, 1])



In [43]:

    
print(df.query('Col_0 > 2'))









    



       Col_0  Col_1  Col_2  Col_3
Row_1      4      5      6      7
Row_2      8      9     10     11



In [44]:

    
df.loc[:, 'Col_1'] = [10, 50, 90]



In [45]:

    
print(df)









    



       Col_0  Col_1  Col_2  Col_3
Row_0      0     10      2      3
Row_1      4     50      6      7
Row_2      8     90     10     11



In [46]:

    
df.loc[:] = np.arange(12).reshape(3, 4) * 100



In [47]:

    
print(df)









    



       Col_0  Col_1  Col_2  Col_3
Row_0      0    100    200    300
Row_1    400    500    600    700
Row_2    800    900   1000   1100



In [48]:

    
# df.loc[:, 'Col_1'] = [10, 50, 90, 130]
# ValueError: Length of values does not match length of index



In [49]:

    
# df.loc[:] = np.arange(16).reshape(4, 4) * 100
# ValueError: cannot set using a slice indexer with a different length than the value



In [50]:

    
df_multi = pd.read_csv('data/src/sample_pandas_normal.csv', index_col=0)



In [51]:

    
print(df_multi)









    



         age state  point
name                     
Alice     24    NY     64
Bob       42    CA     92
Charlie   18    CA     70
Dave      68    TX     70
Ellen     24    CA     88
Frank     30    NY     57



In [52]:

    
print(df_multi.dtypes)









    



age       int64
state    object
point     int64
dtype: object