notebook.community

Edit and run



In [1]:

    
import pandas as pd



In [2]:

    
import numpy as np



In [4]:

    
from pandas import DataFrame, Series



In [6]:

    
df = DataFrame([[1.4, np.nan], [7.1, -4.5], [np.nan, np.nan], [0.75, -1.3]],
index=['a', 'b', 'c', 'd'], columns=['one', 'two'])



In [7]:

    
df



In [8]:

    
df.sum()









    Out[8]:





one    9.25
two   -5.80
dtype: float64



In [9]:

    
df.sum(axis=1)









    Out[9]:





a    1.40
b    2.60
c     NaN
d   -0.55
dtype: float64



In [10]:

    
df.mean(axis=1, skipna=False)









    Out[10]:





a      NaN
b    1.300
c      NaN
d   -0.275
dtype: float64



In [11]:

    
df.idxmax()









    Out[11]:





one    b
two    d
dtype: object



In [12]:

    
df.cumsum()



In [13]:

    
df.describe()









    



/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/numpy/lib/function_base.py:3834: RuntimeWarning: Invalid value encountered in percentile
  RuntimeWarning)






    Out[13]:






  
    
      
      one
      two
    
  
  
    
      count
      3.000000
      2.000000
    
    
      mean
      3.083333
      -2.900000
    
    
      std
      3.493685
      2.262742
    
    
      min
      0.750000
      -4.500000
    
    
      25%
      NaN
      NaN
    
    
      50%
      NaN
      NaN
    
    
      75%
      NaN
      NaN
    
    
      max
      7.100000
      -1.300000



In [17]:

    
obj = Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])



In [18]:

    
uniques = obj.unique()



In [19]:

    
uniques









    Out[19]:





array(['c', 'a', 'd', 'b'], dtype=object)



In [20]:

    
obj.value_counts()









    Out[20]:





a    3
c    3
b    2
d    1
dtype: int64



In [21]:

    
pd.value_counts(obj.values, sort=False)









    Out[21]:





c    3
d    1
b    2
a    3
dtype: int64



In [22]:

    
mask = obj.isin(['b', 'c'])



In [23]:

    
mask









    Out[23]:





0     True
1    False
2    False
3    False
4    False
5     True
6     True
7     True
8     True
dtype: bool



In [24]:

    
obj[mask]









    Out[24]:





0    c
5    b
6    b
7    c
8    c
dtype: object



In [25]:

    
data = DataFrame({'Qu1': [1, 3, 4, 3, 4], 
                  'Qu2': [2, 3, 1, 2, 3],
                 'Qu3': [1, 5, 2, 4, 4]})



In [26]:

    
data



In [27]:

    
result = data.apply(pd.value_counts).fillna(0)



In [28]:

    
result



In [ ]:

	Qu1	Qu2	Qu3
1	1.0	1.0	1.0
2	0.0	2.0	1.0
3	2.0	2.0	0.0
4	2.0	0.0	2.0
5	0.0	0.0	1.0

	one	two
count	3.000000	2.000000
mean	3.083333	-2.900000
std	3.493685	2.262742
min	0.750000	-4.500000
25%	NaN	NaN
50%	NaN	NaN
75%	NaN	NaN
max	7.100000	-1.300000