notebook.community

Edit and run

Title: Grouping Rows In Pandas
Slug: pandas_group_rows_by
Summary: Grouping Rows In Pandas
Date: 2016-05-01 12:00
Category: Python
Tags: Data Wrangling
Authors: Chris Albon



In [5]:

    
# Import modules
import pandas as pd



In [6]:

    
# Example dataframe
raw_data = {'regiment': ['Nighthawks', 'Nighthawks', 'Nighthawks', 'Nighthawks', 'Dragoons', 'Dragoons', 'Dragoons', 'Dragoons', 'Scouts', 'Scouts', 'Scouts', 'Scouts'], 
        'company': ['1st', '1st', '2nd', '2nd', '1st', '1st', '2nd', '2nd','1st', '1st', '2nd', '2nd'], 
        'name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze', 'Jacon', 'Ryaner', 'Sone', 'Sloan', 'Piger', 'Riani', 'Ali'], 
        'preTestScore': [4, 24, 31, 2, 3, 4, 24, 31, 2, 3, 2, 3],
        'postTestScore': [25, 94, 57, 62, 70, 25, 94, 57, 62, 70, 62, 70]}
df = pd.DataFrame(raw_data, columns = ['regiment', 'company', 'name', 'preTestScore', 'postTestScore'])
df









    Out[6]:






  
    
      
      regiment
      company
      name
      preTestScore
      postTestScore
    
  
  
    
      0
      Nighthawks
      1st
      Miller
      4
      25
    
    
      1
      Nighthawks
      1st
      Jacobson
      24
      94
    
    
      2
      Nighthawks
      2nd
      Ali
      31
      57
    
    
      3
      Nighthawks
      2nd
      Milner
      2
      62
    
    
      4
      Dragoons
      1st
      Cooze
      3
      70
    
    
      5
      Dragoons
      1st
      Jacon
      4
      25
    
    
      6
      Dragoons
      2nd
      Ryaner
      24
      94
    
    
      7
      Dragoons
      2nd
      Sone
      31
      57
    
    
      8
      Scouts
      1st
      Sloan
      2
      62
    
    
      9
      Scouts
      1st
      Piger
      3
      70
    
    
      10
      Scouts
      2nd
      Riani
      2
      62
    
    
      11
      Scouts
      2nd
      Ali
      3
      70



In [7]:

    
# Create a grouping object. In other words, create an object that
# represents that particular grouping. In this case we group
# pre-test scores by the regiment.
regiment_preScore = df['preTestScore'].groupby(df['regiment'])



In [8]:

    
# Display the mean value of the each regiment's pre-test score
regiment_preScore.mean()









    Out[8]:





regiment
Dragoons      15.50
Nighthawks    15.25
Scouts         2.50
Name: preTestScore, dtype: float64

	regiment	company	name	preTestScore	postTestScore
0	Nighthawks	1st	Miller	4	25
1	Nighthawks	1st	Jacobson	24	94
2	Nighthawks	2nd	Ali	31	57
3	Nighthawks	2nd	Milner	2	62
4	Dragoons	1st	Cooze	3	70
5	Dragoons	1st	Jacon	4	25
6	Dragoons	2nd	Ryaner	24	94
7	Dragoons	2nd	Sone	31	57
8	Scouts	1st	Sloan	2	62
9	Scouts	1st	Piger	3	70
10	Scouts	2nd	Riani	2	62
11	Scouts	2nd	Ali	3	70