In [1]:
import numpy as np
import pandas as pd
import math
import sys
In [2]:
sys.version
Out[2]:
In [3]:
nan1 = float('nan')
nan2 = math.nan
nan3 = np.nan
nan1, nan2, nan3
Out[3]:
In [4]:
pd.nan
In [5]:
list(map(id, (nan1, nan2, nan3)))
Out[5]:
In [6]:
list(map(math.isnan, (nan1, nan2, nan3)))
Out[6]:
In [7]:
list(map(np.isnan, (nan1, nan2, nan3)))
Out[7]:
In [8]:
nan1 == nan1
Out[8]:
In [9]:
a = pd.Series([0., 0., 0., 1.])
a
Out[9]:
In [10]:
b = pd.Series([0., 0., float('nan'), 1.])
b
Out[10]:
In [11]:
c = pd.Series([float('nan'), float('nan'), float('nan'), float('nan'), ])
c
Out[11]:
In [12]:
def show_dir(x):
for s in ('nan', 'mean', 'tile'):
for name in dir(x):
if s in name.lower():
print(name)
print()
In [13]:
show_dir(a)
In [14]:
show_dir(pd)
In [15]:
np.NAN is np.NaN
Out[15]:
In [16]:
a.mean()
Out[16]:
In [17]:
a.nanmean()
In [18]:
np.mean(a), np.nanmean(a)
Out[18]:
In [19]:
b.mean()
Out[19]:
In [20]:
np.mean(b), np.nanmean(b)
Out[20]:
In [21]:
c.mean()
Out[21]:
In [22]:
np.mean(c)
Out[22]:
In [23]:
np.nanmean(c)
Out[23]:
In [24]:
a.sum(), b.sum(), c.sum()
Out[24]:
In [25]:
a.quantile(.1), b.quantile(.1), c.quantile(.1)
Out[25]:
In [26]:
>>> df = pd.DataFrame([1, 2, 3, np.nan], columns = ['x'])
>>> print(df.x.quantile(0.5))
In [27]:
np.sum(a), np.sum(b), np.sum(c)
Out[27]:
In [28]:
np.nansum(a), np.nansum(b), np.nansum(c)
Out[28]:
In [29]:
help(np.percentile)
In [30]:
for x in (a, b):
for f in (np.percentile, np.nanpercentile):
print(f'for {f}({x})')
y = f(x, q=10)
print(y)
In [31]:
q = 10
In [32]:
np.percentile(a, q)
Out[32]:
In [33]:
np.percentile(b, q)
Out[33]:
In [34]:
np.percentile(c, q)
Out[34]:
In [35]:
np.nanpercentile(a, q)
Out[35]:
In [36]:
np.nanpercentile(b, q)
Out[36]:
In [37]:
np.nanpercentile(c, q)
Out[37]:
In [38]:
def my_nanpercentile(*args, **kwargs):
try:
y = np.nanpercentile(*args, **kwargs)
except RuntimeWarning:
y = np.nan
return y
In [39]:
my_nanpercentile(a, q)
Out[39]:
In [40]:
my_nanpercentile(b, q)
Out[40]:
In [41]:
my_nanpercentile(c, q)
Out[41]:
In [42]:
def my2_nanpercentile(*args, **kwargs):
x = args[0]
only_nans = all(map(math.isnan, x))
if only_nans:
y = np.nan
else:
y = np.nanpercentile(*args, **kwargs)
return y
In [43]:
my2_nanpercentile(a, q)
Out[43]:
In [44]:
my2_nanpercentile(b, q)
Out[44]:
In [45]:
my2_nanpercentile(c, q)
Out[45]:
help(np.nanpercentile)
In [46]:
np.mean(a)
Out[46]:
In [47]:
np.mean(b)
Out[47]:
In [48]:
np.mean(c)
Out[48]:
In [49]:
np.nanmean(a)
Out[49]:
In [50]:
np.nanmean(b)
Out[50]:
In [51]:
np.nanmean(c)
Out[51]:
In [52]:
np.sum(a)
Out[52]:
In [53]:
np.sum(b)
Out[53]:
In [54]:
np.sum(c)
Out[54]:
In [55]:
np.nansum(a)
Out[55]:
In [56]:
np.nansum(b)
Out[56]:
In [57]:
np.nansum(c)
Out[57]: