In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from pandas import DataFrame, Series
In [26]:
df = DataFrame({'key1': ['a', 'a', 'b', 'b', 'a'],
'key2': ['one', 'two', 'one', 'two', 'one'],
'data1': np.random.randn(5),
'data2': np.random.randn(5)})
df
Out[26]:
In [27]:
grouped = df.groupby('key1')
grouped['data1'].quantile(0.9)
Out[27]:
In [29]:
def peak_to_peak(arr):
return arr.max() - arr.min()
grouped.agg(peak_to_peak)
Out[29]:
In [31]:
k1_means = df.groupby('key1').mean().add_prefix('mean_')
k1_means
Out[31]:
In [32]:
pd.merge(df, k1_means, left_on='key1', right_index=True)
Out[32]:
In [ ]:
In [30]:
grouped.describe()
Out[30]:
In [5]:
df['data1'].mean()
df['data2'].sum()
Out[5]:
In [6]:
grouped = df['data1'].groupby(df['key1'])
grouped
Out[6]:
In [7]:
grouped.mean()
Out[7]:
In [8]:
means = df['data1'].groupby([df['key1'], df['key2']]).mean()
means
Out[8]:
In [9]:
means.unstack()
Out[9]:
In [10]:
states = np.array(['Ohio', 'California', 'California', 'Ohio', 'Ohio'])
years = np.array([2005, 2005, 2006, 2005, 2006])
df['data1'].groupby([states, years]).mean()
Out[10]:
In [11]:
df.groupby('key1').mean()
Out[11]:
In [12]:
df.groupby(['key1', 'key2']).mean()
Out[12]:
In [13]:
df.groupby(['key1', 'key2']).size()
Out[13]:
In [14]:
for name, group in df.groupby('key1'):
print name
print group
In [15]:
for (k1, k2), group in df.groupby(['key1', 'key2']):
print k1, k2
print group
In [16]:
pieces = dict(list(df.groupby('key1')))
pieces['b']
Out[16]:
In [17]:
df.dtypes
Out[17]:
In [18]:
grouped = df.groupby(df.dtypes, axis=1)
dict(list(grouped))
Out[18]:
In [22]:
df.groupby(['key1', 'key2'])[['data2', 'data1']].mean()
Out[22]:
In [23]:
s_grouped = df.groupby(['key1', 'key2'])['data2']
s_grouped
Out[23]:
In [24]:
s_grouped.mean()
Out[24]:
In [ ]: