In [8]:

    
import pandas as pd
import numpy as np #didn't use
import scipy #didn't use
import matplotlib.pyplot as plt



In [2]:

    
data = pd.read_csv("/Users/abigaildobyns/Documents/doing_data_science-master/dds_datasets/dds_ch2_nyt/nyt1.csv")#load data set
data #let's look at it









    Out[2]:






  
    
      
      Age
      Gender
      Impressions
      Clicks
      Signed_In
    
  
  
    
      0 
       36
       0
        3
       0
       1
    
    
      1 
       73
       1
        3
       0
       1
    
    
      2 
       30
       0
        3
       0
       1
    
    
      3 
       49
       1
        3
       0
       1
    
    
      4 
       47
       1
       11
       0
       1
    
    
      5 
       47
       0
       11
       1
       1
    
    
      6 
        0
       0
        7
       1
       0
    
    
      7 
       46
       0
        5
       0
       1
    
    
      8 
       16
       0
        3
       0
       1
    
    
      9 
       52
       0
        4
       0
       1
    
    
      10
        0
       0
        8
       1
       0
    
    
      11
       21
       0
        3
       0
       1
    
    
      12
        0
       0
        4
       0
       0
    
    
      13
       57
       0
        6
       0
       1
    
    
      14
       31
       0
        5
       0
       1
    
    
      15
        0
       0
        6
       0
       0
    
    
      16
       40
       1
        3
       0
       1
    
    
      17
       31
       1
        5
       0
       1
    
    
      18
       38
       0
        4
       0
       1
    
    
      19
        0
       0
        5
       0
       0
    
    
      20
       59
       1
        4
       0
       1
    
    
      21
       61
       0
        6
       0
       1
    
    
      22
       48
       0
        7
       0
       1
    
    
      23
       29
       1
        2
       0
       1
    
    
      24
        0
       0
        4
       0
       0
    
    
      25
       19
       1
        4
       0
       1
    
    
      26
       19
       0
        3
       0
       1
    
    
      27
       48
       1
        9
       0
       1
    
    
      28
       48
       1
        4
       0
       1
    
    
      29
       21
       1
        5
       0
       1
    
    
      30
       23
       0
        4
       0
       1
    
    
      31
       66
       1
        3
       0
       1
    
    
      32
       23
       1
        7
       0
       1
    
    
      33
       44
       1
        4
       0
       1
    
    
      34
       32
       0
        2
       0
       1
    
    
      35
       22
       1
        2
       0
       1
    
    
      36
       30
       1
        8
       0
       1
    
    
      37
       74
       0
        2
       0
       1
    
    
      38
       65
       1
        5
       0
       1
    
    
      39
        0
       0
        7
       2
       0
    
    
      40
       32
       1
        9
       0
       1
    
    
      41
        0
       0
        4
       0
       0
    
    
      42
       61
       0
        2
       0
       1
    
    
      43
       63
       1
        4
       0
       1
    
    
      44
       36
       1
        1
       0
       1
    
    
      45
       35
       0
        7
       0
       1
    
    
      46
        0
       0
        3
       0
       0
    
    
      47
        0
       0
        7
       0
       0
    
    
      48
       73
       0
        4
       0
       1
    
    
      49
        0
       0
        5
       0
       0
    
    
      50
        0
       0
        5
       0
       0
    
    
      51
       33
       0
        4
       0
       1
    
    
      52
       34
       0
        5
       0
       1
    
    
      53
       48
       1
        1
       0
       1
    
    
      54
       43
       1
        4
       0
       1
    
    
      55
       59
       1
        2
       0
       1
    
    
      56
       34
       0
        3
       0
       1
    
    
      57
       22
       0
        5
       0
       1
    
    
      58
       18
       1
        1
       0
       1
    
    
      59
        0
       0
        4
       0
       0
    
    
      
      ...
      ...
      ...
      ...
      ...
    
  

458441 rows × 5 columns

1. Create a new variable, age_group, that categorizes users as "<18", "18-24", "25-34", "35-44", "45-54", "55-64", and "65+".



In [7]:

    
age_group = pd.Series(index = data.index) #create a series to store the new age group in

for index, row in data['Age'].iteritems(): #iterate over the rows, must explicitly call index too
    if row < 18:
        age_group[index] = 1
    if 18 <= row <= 24:
        age_group[index] = 2
    if 25 <= row <= 34:
        age_group[index] = 3
    if 35 <= row <= 44:
        age_group[index] = 4
    if 45 <= row <= 54:
        age_group[index] = 5
    if 55 <= row <= 64:
        age_group[index] = 6
    if row >= 65:
        age_group[index] = 7

data['age_group'] = age_group #add new categories to the data frame
data #check that it worked









    Out[7]:






  
    
      
      Age
      Gender
      Impressions
      Clicks
      Signed_In
      age_group
    
  
  
    
      0 
       36
       0
        3
       0
       1
       4
    
    
      1 
       73
       1
        3
       0
       1
       7
    
    
      2 
       30
       0
        3
       0
       1
       3
    
    
      3 
       49
       1
        3
       0
       1
       5
    
    
      4 
       47
       1
       11
       0
       1
       5
    
    
      5 
       47
       0
       11
       1
       1
       5
    
    
      6 
        0
       0
        7
       1
       0
       1
    
    
      7 
       46
       0
        5
       0
       1
       5
    
    
      8 
       16
       0
        3
       0
       1
       1
    
    
      9 
       52
       0
        4
       0
       1
       5
    
    
      10
        0
       0
        8
       1
       0
       1
    
    
      11
       21
       0
        3
       0
       1
       2
    
    
      12
        0
       0
        4
       0
       0
       1
    
    
      13
       57
       0
        6
       0
       1
       6
    
    
      14
       31
       0
        5
       0
       1
       3
    
    
      15
        0
       0
        6
       0
       0
       1
    
    
      16
       40
       1
        3
       0
       1
       4
    
    
      17
       31
       1
        5
       0
       1
       3
    
    
      18
       38
       0
        4
       0
       1
       4
    
    
      19
        0
       0
        5
       0
       0
       1
    
    
      20
       59
       1
        4
       0
       1
       6
    
    
      21
       61
       0
        6
       0
       1
       6
    
    
      22
       48
       0
        7
       0
       1
       5
    
    
      23
       29
       1
        2
       0
       1
       3
    
    
      24
        0
       0
        4
       0
       0
       1
    
    
      25
       19
       1
        4
       0
       1
       2
    
    
      26
       19
       0
        3
       0
       1
       2
    
    
      27
       48
       1
        9
       0
       1
       5
    
    
      28
       48
       1
        4
       0
       1
       5
    
    
      29
       21
       1
        5
       0
       1
       2
    
    
      30
       23
       0
        4
       0
       1
       2
    
    
      31
       66
       1
        3
       0
       1
       7
    
    
      32
       23
       1
        7
       0
       1
       2
    
    
      33
       44
       1
        4
       0
       1
       4
    
    
      34
       32
       0
        2
       0
       1
       3
    
    
      35
       22
       1
        2
       0
       1
       2
    
    
      36
       30
       1
        8
       0
       1
       3
    
    
      37
       74
       0
        2
       0
       1
       7
    
    
      38
       65
       1
        5
       0
       1
       7
    
    
      39
        0
       0
        7
       2
       0
       1
    
    
      40
       32
       1
        9
       0
       1
       3
    
    
      41
        0
       0
        4
       0
       0
       1
    
    
      42
       61
       0
        2
       0
       1
       6
    
    
      43
       63
       1
        4
       0
       1
       6
    
    
      44
       36
       1
        1
       0
       1
       4
    
    
      45
       35
       0
        7
       0
       1
       4
    
    
      46
        0
       0
        3
       0
       0
       1
    
    
      47
        0
       0
        7
       0
       0
       1
    
    
      48
       73
       0
        4
       0
       1
       7
    
    
      49
        0
       0
        5
       0
       0
       1
    
    
      50
        0
       0
        5
       0
       0
       1
    
    
      51
       33
       0
        4
       0
       1
       3
    
    
      52
       34
       0
        5
       0
       1
       3
    
    
      53
       48
       1
        1
       0
       1
       5
    
    
      54
       43
       1
        4
       0
       1
       4
    
    
      55
       59
       1
        2
       0
       1
       6
    
    
      56
       34
       0
        3
       0
       1
       3
    
    
      57
       22
       0
        5
       0
       1
       2
    
    
      58
       18
       1
        1
       0
       1
       2
    
    
      59
        0
       0
        4
       0
       0
       1
    
    
      
      ...
      ...
      ...
      ...
      ...
      ...
    
  

458441 rows × 6 columns

Plot the distributions of number impressions and clickthrough- rate (CTR=# clicks/# impressions) for these six age categories.



In [9]:

    
data['CTR'] = data['Clicks']/data['Impressions'] #make CTR column



In [21]:

    
data.boxplot(column= 'Impressions', by = 'age_group')
plt.show()



In [22]:

    
data.boxplot(column= 'CTR', by = 'age_group')
plt.show()

Define a new variable to segment or categorize users based on their click behavior.



In [24]:

    
click_group = pd.Series(index = data.index) #create a series to store the new age group in

for index, row in data['Clicks'].iteritems(): #iterate over the rows
    if row < 1:
        click_group[index] = 1
    if 1 <= row <= 2:
        click_group[index] = 2
    if row >2:
        click_group[index] = 3


data['click_group'] = click_group #add new categories to the data frame
data #check that it worked









    Out[24]:






  
    
      
      Age
      Gender
      Impressions
      Clicks
      Signed_In
      age_group
      CTR
      click_group
    
  
  
    
      0 
       36
       0
        3
       0
       1
       4
       0.000000
       1
    
    
      1 
       73
       1
        3
       0
       1
       7
       0.000000
       1
    
    
      2 
       30
       0
        3
       0
       1
       3
       0.000000
       1
    
    
      3 
       49
       1
        3
       0
       1
       5
       0.000000
       1
    
    
      4 
       47
       1
       11
       0
       1
       5
       0.000000
       1
    
    
      5 
       47
       0
       11
       1
       1
       5
       0.090909
       2
    
    
      6 
        0
       0
        7
       1
       0
       1
       0.142857
       2
    
    
      7 
       46
       0
        5
       0
       1
       5
       0.000000
       1
    
    
      8 
       16
       0
        3
       0
       1
       1
       0.000000
       1
    
    
      9 
       52
       0
        4
       0
       1
       5
       0.000000
       1
    
    
      10
        0
       0
        8
       1
       0
       1
       0.125000
       2
    
    
      11
       21
       0
        3
       0
       1
       2
       0.000000
       1
    
    
      12
        0
       0
        4
       0
       0
       1
       0.000000
       1
    
    
      13
       57
       0
        6
       0
       1
       6
       0.000000
       1
    
    
      14
       31
       0
        5
       0
       1
       3
       0.000000
       1
    
    
      15
        0
       0
        6
       0
       0
       1
       0.000000
       1
    
    
      16
       40
       1
        3
       0
       1
       4
       0.000000
       1
    
    
      17
       31
       1
        5
       0
       1
       3
       0.000000
       1
    
    
      18
       38
       0
        4
       0
       1
       4
       0.000000
       1
    
    
      19
        0
       0
        5
       0
       0
       1
       0.000000
       1
    
    
      20
       59
       1
        4
       0
       1
       6
       0.000000
       1
    
    
      21
       61
       0
        6
       0
       1
       6
       0.000000
       1
    
    
      22
       48
       0
        7
       0
       1
       5
       0.000000
       1
    
    
      23
       29
       1
        2
       0
       1
       3
       0.000000
       1
    
    
      24
        0
       0
        4
       0
       0
       1
       0.000000
       1
    
    
      25
       19
       1
        4
       0
       1
       2
       0.000000
       1
    
    
      26
       19
       0
        3
       0
       1
       2
       0.000000
       1
    
    
      27
       48
       1
        9
       0
       1
       5
       0.000000
       1
    
    
      28
       48
       1
        4
       0
       1
       5
       0.000000
       1
    
    
      29
       21
       1
        5
       0
       1
       2
       0.000000
       1
    
    
      30
       23
       0
        4
       0
       1
       2
       0.000000
       1
    
    
      31
       66
       1
        3
       0
       1
       7
       0.000000
       1
    
    
      32
       23
       1
        7
       0
       1
       2
       0.000000
       1
    
    
      33
       44
       1
        4
       0
       1
       4
       0.000000
       1
    
    
      34
       32
       0
        2
       0
       1
       3
       0.000000
       1
    
    
      35
       22
       1
        2
       0
       1
       2
       0.000000
       1
    
    
      36
       30
       1
        8
       0
       1
       3
       0.000000
       1
    
    
      37
       74
       0
        2
       0
       1
       7
       0.000000
       1
    
    
      38
       65
       1
        5
       0
       1
       7
       0.000000
       1
    
    
      39
        0
       0
        7
       2
       0
       1
       0.285714
       2
    
    
      40
       32
       1
        9
       0
       1
       3
       0.000000
       1
    
    
      41
        0
       0
        4
       0
       0
       1
       0.000000
       1
    
    
      42
       61
       0
        2
       0
       1
       6
       0.000000
       1
    
    
      43
       63
       1
        4
       0
       1
       6
       0.000000
       1
    
    
      44
       36
       1
        1
       0
       1
       4
       0.000000
       1
    
    
      45
       35
       0
        7
       0
       1
       4
       0.000000
       1
    
    
      46
        0
       0
        3
       0
       0
       1
       0.000000
       1
    
    
      47
        0
       0
        7
       0
       0
       1
       0.000000
       1
    
    
      48
       73
       0
        4
       0
       1
       7
       0.000000
       1
    
    
      49
        0
       0
        5
       0
       0
       1
       0.000000
       1
    
    
      50
        0
       0
        5
       0
       0
       1
       0.000000
       1
    
    
      51
       33
       0
        4
       0
       1
       3
       0.000000
       1
    
    
      52
       34
       0
        5
       0
       1
       3
       0.000000
       1
    
    
      53
       48
       1
        1
       0
       1
       5
       0.000000
       1
    
    
      54
       43
       1
        4
       0
       1
       4
       0.000000
       1
    
    
      55
       59
       1
        2
       0
       1
       6
       0.000000
       1
    
    
      56
       34
       0
        3
       0
       1
       3
       0.000000
       1
    
    
      57
       22
       0
        5
       0
       1
       2
       0.000000
       1
    
    
      58
       18
       1
        1
       0
       1
       2
       0.000000
       1
    
    
      59
        0
       0
        4
       0
       0
       1
       0.000000
       1
    
    
      
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
  

458441 rows × 8 columns

Explore the data and make visual and quantitative comparisons across user segments/demographics (<18-year-old males versus < 18-year-old females or logged-in versus not, for example).



In [26]:

    
data[data['age_group']==1].boxplot(column = 'Impressions', by ='Gender')
plt.show()

The above example has two sorting steps. the first is in the indexing, where I selected only age group one. i could have also used

data[data['Age']]<18]

to accomplish the same thing.

the grouping is handled by the pd.data.boxplot() call using teh by = parameter.

Create metrics/measurements/statistics that summarize the data. Examples of potential metrics include CTR, quantiles, mean, median, variance, and max, and these can be calculated across the various user segments. Be selective. Think about what will be important to track over time—what will compress the data, but still capture user behavior.



In [23]:

    
#summary stats by age group for Impressions
byage = data.groupby('age_group')
byage['Impressions'].describe()









    Out[23]:





age_group       
1          count    150934.000000
           mean          5.000345
           std           2.243536
           min           0.000000
           25%           3.000000
           50%           5.000000
           75%           6.000000
           max          18.000000
2          count     40694.000000
           mean          5.002826
           std           2.244950
           min           0.000000
           25%           3.000000
           50%           5.000000
           75%           6.000000
           max          17.000000
3          count     58174.000000
           mean          4.993829
           std           2.226877
           min           0.000000
           25%           3.000000
           50%           5.000000
           75%           6.000000
           max          17.000000
4          count     70860.000000
           mean          5.021507
           std           2.237829
           min           0.000000
           25%           3.000000
           50%           5.000000
           75%           6.000000
           max          20.000000
5          count     64288.000000
           mean          5.010406
           std           2.240790
           min           0.000000
           25%           3.000000
           50%           5.000000
           75%           6.000000
           max          17.000000
6          count     44738.000000
           mean          5.022308
           std           2.230206
           min           0.000000
           25%           3.000000
           50%           5.000000
           75%           6.000000
           max          16.000000
7          count     28753.000000
           mean          5.012347
           std           2.249162
           min           0.000000
           25%           3.000000
           50%           5.000000
           75%           6.000000
           max          16.000000
Length: 56, dtype: float64



In [27]:

    
#summary of CTR by gender
bygender = data.groupby('Gender')
bygender['CTR'].describe()









    Out[27]:





Gender       
0       count    2.901760e+05
        mean              inf
        std               NaN
        min      0.000000e+00
        25%      0.000000e+00
        50%      0.000000e+00
        75%      0.000000e+00
        max               inf
1       count    1.682650e+05
        mean              inf
        std               NaN
        min      0.000000e+00
        25%      0.000000e+00
        50%      0.000000e+00
        75%      0.000000e+00
        max               inf
dtype: float64



In [28]:

    
#summary stats of all users that were signed in
data[data['Signed_In'] == 1].describe()









    Out[28]:






  
    
      
      Age
      Gender
      Impressions
      Clicks
      Signed_In
      age_group
      CTR
      click_group
    
  
  
    
      count
       321335.000000
       321335.000000
       321335.000000
       321335.000000
       321335
       321335.000000
       3.213350e+05
       321335.000000
    
    
      mean
           42.062054
            0.523644
            5.010584
            0.071480
            1
            4.183537
                inf
            1.068738
    
    
      std
           16.308117
            0.499441
            2.238784
            0.268659
            0
            1.613538
                NaN
            0.253328
    
    
      min
            7.000000
            0.000000
            0.000000
            0.000000
            1
            1.000000
       0.000000e+00
            1.000000
    
    
      25%
           29.000000
            0.000000
            3.000000
            0.000000
            1
            3.000000
       0.000000e+00
            1.000000
    
    
      50%
           41.000000
            1.000000
            5.000000
            0.000000
            1
            4.000000
       0.000000e+00
            1.000000
    
    
      75%
           53.000000
            1.000000
            6.000000
            0.000000
            1
            5.000000
       0.000000e+00
            1.000000
    
    
      max
          108.000000
            1.000000
           20.000000
            3.000000
            1
            7.000000
                inf
            3.000000
    
  

8 rows × 8 columns

other files from this set contain other days. load and process those to compare days. I'm not doing it right now because i've got the jist of this.

	Age	Gender	Impressions	Clicks	Signed_In
0	36	0	3	0	1
1	73	1	3	0	1
2	30	0	3	0	1
3	49	1	3	0	1
4	47	1	11	0	1
5	47	0	11	1	1
6	0	0	7	1	0
7	46	0	5	0	1
8	16	0	3	0	1
9	52	0	4	0	1
10	0	0	8	1	0
11	21	0	3	0	1
12	0	0	4	0	0
13	57	0	6	0	1
14	31	0	5	0	1
15	0	0	6	0	0
16	40	1	3	0	1
17	31	1	5	0	1
18	38	0	4	0	1
19	0	0	5	0	0
20	59	1	4	0	1
21	61	0	6	0	1
22	48	0	7	0	1
23	29	1	2	0	1
24	0	0	4	0	0
25	19	1	4	0	1
26	19	0	3	0	1
27	48	1	9	0	1
28	48	1	4	0	1
29	21	1	5	0	1
30	23	0	4	0	1
31	66	1	3	0	1
32	23	1	7	0	1
33	44	1	4	0	1
34	32	0	2	0	1
35	22	1	2	0	1
36	30	1	8	0	1
37	74	0	2	0	1
38	65	1	5	0	1
39	0	0	7	2	0
40	32	1	9	0	1
41	0	0	4	0	0
42	61	0	2	0	1
43	63	1	4	0	1
44	36	1	1	0	1
45	35	0	7	0	1
46	0	0	3	0	0
47	0	0	7	0	0
48	73	0	4	0	1
49	0	0	5	0	0
50	0	0	5	0	0
51	33	0	4	0	1
52	34	0	5	0	1
53	48	1	1	0	1
54	43	1	4	0	1
55	59	1	2	0	1
56	34	0	3	0	1
57	22	0	5	0	1
58	18	1	1	0	1
59	0	0	4	0	0
	...	...	...	...	...

	Age	Gender	Impressions	Clicks	Signed_In	age_group
0	36	0	3	0	1	4
1	73	1	3	0	1	7
2	30	0	3	0	1	3
3	49	1	3	0	1	5
4	47	1	11	0	1	5
5	47	0	11	1	1	5
6	0	0	7	1	0	1
7	46	0	5	0	1	5
8	16	0	3	0	1	1
9	52	0	4	0	1	5
10	0	0	8	1	0	1
11	21	0	3	0	1	2
12	0	0	4	0	0	1
13	57	0	6	0	1	6
14	31	0	5	0	1	3
15	0	0	6	0	0	1
16	40	1	3	0	1	4
17	31	1	5	0	1	3
18	38	0	4	0	1	4
19	0	0	5	0	0	1
20	59	1	4	0	1	6
21	61	0	6	0	1	6
22	48	0	7	0	1	5
23	29	1	2	0	1	3
24	0	0	4	0	0	1
25	19	1	4	0	1	2
26	19	0	3	0	1	2
27	48	1	9	0	1	5
28	48	1	4	0	1	5
29	21	1	5	0	1	2
30	23	0	4	0	1	2
31	66	1	3	0	1	7
32	23	1	7	0	1	2
33	44	1	4	0	1	4
34	32	0	2	0	1	3
35	22	1	2	0	1	2
36	30	1	8	0	1	3
37	74	0	2	0	1	7
38	65	1	5	0	1	7
39	0	0	7	2	0	1
40	32	1	9	0	1	3
41	0	0	4	0	0	1
42	61	0	2	0	1	6
43	63	1	4	0	1	6
44	36	1	1	0	1	4
45	35	0	7	0	1	4
46	0	0	3	0	0	1
47	0	0	7	0	0	1
48	73	0	4	0	1	7
49	0	0	5	0	0	1
50	0	0	5	0	0	1
51	33	0	4	0	1	3
52	34	0	5	0	1	3
53	48	1	1	0	1	5
54	43	1	4	0	1	4
55	59	1	2	0	1	6
56	34	0	3	0	1	3
57	22	0	5	0	1	2
58	18	1	1	0	1	2
59	0	0	4	0	0	1
	...	...	...	...	...	...

	Age	Gender	Impressions	Clicks	Signed_In	age_group	CTR	click_group
0	36	0	3	0	1	4	0.000000	1
1	73	1	3	0	1	7	0.000000	1
2	30	0	3	0	1	3	0.000000	1
3	49	1	3	0	1	5	0.000000	1
4	47	1	11	0	1	5	0.000000	1
5	47	0	11	1	1	5	0.090909	2
6	0	0	7	1	0	1	0.142857	2
7	46	0	5	0	1	5	0.000000	1
8	16	0	3	0	1	1	0.000000	1
9	52	0	4	0	1	5	0.000000	1
10	0	0	8	1	0	1	0.125000	2
11	21	0	3	0	1	2	0.000000	1
12	0	0	4	0	0	1	0.000000	1
13	57	0	6	0	1	6	0.000000	1
14	31	0	5	0	1	3	0.000000	1
15	0	0	6	0	0	1	0.000000	1
16	40	1	3	0	1	4	0.000000	1
17	31	1	5	0	1	3	0.000000	1
18	38	0	4	0	1	4	0.000000	1
19	0	0	5	0	0	1	0.000000	1
20	59	1	4	0	1	6	0.000000	1
21	61	0	6	0	1	6	0.000000	1
22	48	0	7	0	1	5	0.000000	1
23	29	1	2	0	1	3	0.000000	1
24	0	0	4	0	0	1	0.000000	1
25	19	1	4	0	1	2	0.000000	1
26	19	0	3	0	1	2	0.000000	1
27	48	1	9	0	1	5	0.000000	1
28	48	1	4	0	1	5	0.000000	1
29	21	1	5	0	1	2	0.000000	1
30	23	0	4	0	1	2	0.000000	1
31	66	1	3	0	1	7	0.000000	1
32	23	1	7	0	1	2	0.000000	1
33	44	1	4	0	1	4	0.000000	1
34	32	0	2	0	1	3	0.000000	1
35	22	1	2	0	1	2	0.000000	1
36	30	1	8	0	1	3	0.000000	1
37	74	0	2	0	1	7	0.000000	1
38	65	1	5	0	1	7	0.000000	1
39	0	0	7	2	0	1	0.285714	2
40	32	1	9	0	1	3	0.000000	1
41	0	0	4	0	0	1	0.000000	1
42	61	0	2	0	1	6	0.000000	1
43	63	1	4	0	1	6	0.000000	1
44	36	1	1	0	1	4	0.000000	1
45	35	0	7	0	1	4	0.000000	1
46	0	0	3	0	0	1	0.000000	1
47	0	0	7	0	0	1	0.000000	1
48	73	0	4	0	1	7	0.000000	1
49	0	0	5	0	0	1	0.000000	1
50	0	0	5	0	0	1	0.000000	1
51	33	0	4	0	1	3	0.000000	1
52	34	0	5	0	1	3	0.000000	1
53	48	1	1	0	1	5	0.000000	1
54	43	1	4	0	1	4	0.000000	1
55	59	1	2	0	1	6	0.000000	1
56	34	0	3	0	1	3	0.000000	1
57	22	0	5	0	1	2	0.000000	1
58	18	1	1	0	1	2	0.000000	1
59	0	0	4	0	0	1	0.000000	1
	...	...	...	...	...	...	...	...

	Age	Gender	Impressions	Clicks	Signed_In	age_group	CTR	click_group
count	321335.000000	321335.000000	321335.000000	321335.000000	321335	321335.000000	3.213350e+05	321335.000000
mean	42.062054	0.523644	5.010584	0.071480	1	4.183537	inf	1.068738
std	16.308117	0.499441	2.238784	0.268659	0	1.613538	NaN	0.253328
min	7.000000	0.000000	0.000000	0.000000	1	1.000000	0.000000e+00	1.000000
25%	29.000000	0.000000	3.000000	0.000000	1	3.000000	0.000000e+00	1.000000
50%	41.000000	1.000000	5.000000	0.000000	1	4.000000	0.000000e+00	1.000000
75%	53.000000	1.000000	6.000000	0.000000	1	5.000000	0.000000e+00	1.000000
max	108.000000	1.000000	20.000000	3.000000	1	7.000000	inf	3.000000

	Age	Gender	Impressions	Clicks	Signed_In
0	36	0	3	0	1
1	73	1	3	0	1
2	30	0	3	0	1
3	49	1	3	0	1
4	47	1	11	0	1
5	47	0	11	1	1
6	0	0	7	1	0
7	46	0	5	0	1
8	16	0	3	0	1
9	52	0	4	0	1
10	0	0	8	1	0
11	21	0	3	0	1
12	0	0	4	0	0
13	57	0	6	0	1
14	31	0	5	0	1
15	0	0	6	0	0
16	40	1	3	0	1
17	31	1	5	0	1
18	38	0	4	0	1
19	0	0	5	0	0
20	59	1	4	0	1
21	61	0	6	0	1
22	48	0	7	0	1
23	29	1	2	0	1
24	0	0	4	0	0
25	19	1	4	0	1
26	19	0	3	0	1
27	48	1	9	0	1
28	48	1	4	0	1
29	21	1	5	0	1
30	23	0	4	0	1
31	66	1	3	0	1
32	23	1	7	0	1
33	44	1	4	0	1
34	32	0	2	0	1
35	22	1	2	0	1
36	30	1	8	0	1
37	74	0	2	0	1
38	65	1	5	0	1
39	0	0	7	2	0
40	32	1	9	0	1
41	0	0	4	0	0
42	61	0	2	0	1
43	63	1	4	0	1
44	36	1	1	0	1
45	35	0	7	0	1
46	0	0	3	0	0
47	0	0	7	0	0
48	73	0	4	0	1
49	0	0	5	0	0
50	0	0	5	0	0
51	33	0	4	0	1
52	34	0	5	0	1
53	48	1	1	0	1
54	43	1	4	0	1
55	59	1	2	0	1
56	34	0	3	0	1
57	22	0	5	0	1
58	18	1	1	0	1
59	0	0	4	0	0
	...	...	...	...	...

	Age	Gender	Impressions	Clicks	Signed_In	age_group
0	36	0	3	0	1	4
1	73	1	3	0	1	7
2	30	0	3	0	1	3
3	49	1	3	0	1	5
4	47	1	11	0	1	5
5	47	0	11	1	1	5
6	0	0	7	1	0	1
7	46	0	5	0	1	5
8	16	0	3	0	1	1
9	52	0	4	0	1	5
10	0	0	8	1	0	1
11	21	0	3	0	1	2
12	0	0	4	0	0	1
13	57	0	6	0	1	6
14	31	0	5	0	1	3
15	0	0	6	0	0	1
16	40	1	3	0	1	4
17	31	1	5	0	1	3
18	38	0	4	0	1	4
19	0	0	5	0	0	1
20	59	1	4	0	1	6
21	61	0	6	0	1	6
22	48	0	7	0	1	5
23	29	1	2	0	1	3
24	0	0	4	0	0	1
25	19	1	4	0	1	2
26	19	0	3	0	1	2
27	48	1	9	0	1	5
28	48	1	4	0	1	5
29	21	1	5	0	1	2
30	23	0	4	0	1	2
31	66	1	3	0	1	7
32	23	1	7	0	1	2
33	44	1	4	0	1	4
34	32	0	2	0	1	3
35	22	1	2	0	1	2
36	30	1	8	0	1	3
37	74	0	2	0	1	7
38	65	1	5	0	1	7
39	0	0	7	2	0	1
40	32	1	9	0	1	3
41	0	0	4	0	0	1
42	61	0	2	0	1	6
43	63	1	4	0	1	6
44	36	1	1	0	1	4
45	35	0	7	0	1	4
46	0	0	3	0	0	1
47	0	0	7	0	0	1
48	73	0	4	0	1	7
49	0	0	5	0	0	1
50	0	0	5	0	0	1
51	33	0	4	0	1	3
52	34	0	5	0	1	3
53	48	1	1	0	1	5
54	43	1	4	0	1	4
55	59	1	2	0	1	6
56	34	0	3	0	1	3
57	22	0	5	0	1	2
58	18	1	1	0	1	2
59	0	0	4	0	0	1
	...	...	...	...	...	...

	Age	Gender	Impressions	Clicks	Signed_In
0	36	0	3	0	1
1	73	1	3	0	1
2	30	0	3	0	1
3	49	1	3	0	1
4	47	1	11	0	1
5	47	0	11	1	1
6	0	0	7	1	0
7	46	0	5	0	1
8	16	0	3	0	1
9	52	0	4	0	1
10	0	0	8	1	0
11	21	0	3	0	1
12	0	0	4	0	0
13	57	0	6	0	1
14	31	0	5	0	1
15	0	0	6	0	0
16	40	1	3	0	1
17	31	1	5	0	1
18	38	0	4	0	1
19	0	0	5	0	0
20	59	1	4	0	1
21	61	0	6	0	1
22	48	0	7	0	1
23	29	1	2	0	1
24	0	0	4	0	0
25	19	1	4	0	1
26	19	0	3	0	1
27	48	1	9	0	1
28	48	1	4	0	1
29	21	1	5	0	1
30	23	0	4	0	1
31	66	1	3	0	1
32	23	1	7	0	1
33	44	1	4	0	1
34	32	0	2	0	1
35	22	1	2	0	1
36	30	1	8	0	1
37	74	0	2	0	1
38	65	1	5	0	1
39	0	0	7	2	0
40	32	1	9	0	1
41	0	0	4	0	0
42	61	0	2	0	1
43	63	1	4	0	1
44	36	1	1	0	1
45	35	0	7	0	1
46	0	0	3	0	0
47	0	0	7	0	0
48	73	0	4	0	1
49	0	0	5	0	0
50	0	0	5	0	0
51	33	0	4	0	1
52	34	0	5	0	1
53	48	1	1	0	1
54	43	1	4	0	1
55	59	1	2	0	1
56	34	0	3	0	1
57	22	0	5	0	1
58	18	1	1	0	1
59	0	0	4	0	0
	...	...	...	...	...

	Age	Gender	Impressions	Clicks	Signed_In	age_group
0	36	0	3	0	1	4
1	73	1	3	0	1	7
2	30	0	3	0	1	3
3	49	1	3	0	1	5
4	47	1	11	0	1	5
5	47	0	11	1	1	5
6	0	0	7	1	0	1
7	46	0	5	0	1	5
8	16	0	3	0	1	1
9	52	0	4	0	1	5
10	0	0	8	1	0	1
11	21	0	3	0	1	2
12	0	0	4	0	0	1
13	57	0	6	0	1	6
14	31	0	5	0	1	3
15	0	0	6	0	0	1
16	40	1	3	0	1	4
17	31	1	5	0	1	3
18	38	0	4	0	1	4
19	0	0	5	0	0	1
20	59	1	4	0	1	6
21	61	0	6	0	1	6
22	48	0	7	0	1	5
23	29	1	2	0	1	3
24	0	0	4	0	0	1
25	19	1	4	0	1	2
26	19	0	3	0	1	2
27	48	1	9	0	1	5
28	48	1	4	0	1	5
29	21	1	5	0	1	2
30	23	0	4	0	1	2
31	66	1	3	0	1	7
32	23	1	7	0	1	2
33	44	1	4	0	1	4
34	32	0	2	0	1	3
35	22	1	2	0	1	2
36	30	1	8	0	1	3
37	74	0	2	0	1	7
38	65	1	5	0	1	7
39	0	0	7	2	0	1
40	32	1	9	0	1	3
41	0	0	4	0	0	1
42	61	0	2	0	1	6
43	63	1	4	0	1	6
44	36	1	1	0	1	4
45	35	0	7	0	1	4
46	0	0	3	0	0	1
47	0	0	7	0	0	1
48	73	0	4	0	1	7
49	0	0	5	0	0	1
50	0	0	5	0	0	1
51	33	0	4	0	1	3
52	34	0	5	0	1	3
53	48	1	1	0	1	5
54	43	1	4	0	1	4
55	59	1	2	0	1	6
56	34	0	3	0	1	3
57	22	0	5	0	1	2
58	18	1	1	0	1	2
59	0	0	4	0	0	1
	...	...	...	...	...	...