notebook.community

Edit and run



In [8]:

    
import pandas as pd



In [20]:

    
surveys_df = pd.read_csv('./data/surveys.csv')



In [22]:

    
type(surveys_df)









    Out[22]:





pandas.core.frame.DataFrame



In [27]:

    
surveys_df.dtypes









    Out[27]:





record_id            int64
month                int64
day                  int64
year                 int64
plot_id              int64
species_id          object
sex                 object
hindfoot_length    float64
weight             float64
dtype: object



In [14]:

    
surveys_df.tail()









    Out[14]:







  
    
      
      record_id
      month
      day
      year
      plot_id
      species_id
      sex
      hindfoot_length
      weight
    
  
  
    
      35544
      35545
      12
      31
      2002
      15
      AH
      NaN
      NaN
      NaN
    
    
      35545
      35546
      12
      31
      2002
      15
      AH
      NaN
      NaN
      NaN
    
    
      35546
      35547
      12
      31
      2002
      10
      RM
      F
      15.0
      14.0
    
    
      35547
      35548
      12
      31
      2002
      7
      DO
      M
      36.0
      51.0
    
    
      35548
      35549
      12
      31
      2002
      5
      NaN
      NaN
      NaN
      NaN



In [32]:

    
surveys_df.columns









    Out[32]:





Index(['record_id', 'month', 'day', 'year', 'plot_id', 'species_id', 'sex',
       'hindfoot_length', 'weight'],
      dtype='object')



In [31]:

    
surveys_df.shape









    Out[31]:





(35549, 9)



In [39]:

    
surveys_df['species_id'].unique()









    Out[39]:





array(['NL', 'DM', 'PF', 'PE', 'DS', 'PP', 'SH', 'OT', 'DO', 'OX', 'SS',
       'OL', 'RM', nan, 'SA', 'PM', 'AH', 'DX', 'AB', 'CB', 'CM', 'CQ',
       'RF', 'PC', 'PG', 'PH', 'PU', 'CV', 'UR', 'UP', 'ZL', 'UL', 'CS',
       'SC', 'BA', 'SF', 'RO', 'AS', 'SO', 'PI', 'ST', 'CU', 'SU', 'RX',
       'PB', 'PL', 'PX', 'CT', 'US'], dtype=object)



In [41]:

    
surveys_df['species_id'].describe()









    Out[41]:





count     34786
unique       48
top          DM
freq      10596
Name: species_id, dtype: object



In [48]:

    
sorted_data = surveys_df.groupby('sex')
type(sorted_data)









    Out[48]:





pandas.core.groupby.DataFrameGroupBy



In [51]:

    
sorted_data.describe().stack()









    Out[51]:







  
    
      
      
      day
      hindfoot_length
      month
      plot_id
      record_id
      weight
      year
    
    
      sex
      
      
      
      
      
      
      
      
    
  
  
    
      F
      count
      15690.000000
      14894.000000
      15690.000000
      15690.000000
      15690.000000
      15303.000000
      15690.000000
    
    
      mean
      16.007138
      28.836780
      6.583047
      11.440854
      18036.412046
      42.170555
      1990.644997
    
    
      std
      8.271144
      9.463789
      3.367350
      6.870684
      10423.089000
      36.847958
      7.598725
    
    
      min
      1.000000
      7.000000
      1.000000
      1.000000
      3.000000
      4.000000
      1977.000000
    
    
      25%
      9.000000
      21.000000
      4.000000
      5.000000
      8917.500000
      20.000000
      1984.000000
    
    
      50%
      16.000000
      27.000000
      7.000000
      12.000000
      18075.500000
      34.000000
      1990.000000
    
    
      75%
      23.000000
      36.000000
      10.000000
      17.000000
      27250.000000
      46.000000
      1997.000000
    
    
      max
      31.000000
      64.000000
      12.000000
      24.000000
      35547.000000
      274.000000
      2002.000000
    
    
      M
      count
      17348.000000
      16476.000000
      17348.000000
      17348.000000
      17348.000000
      16879.000000
      17348.000000
    
    
      mean
      16.184286
      29.709578
      6.392668
      11.098282
      17754.835601
      42.995379
      1990.480401
    
    
      std
      8.199274
      9.629246
      3.420806
      6.728713
      10132.203323
      36.184981
      7.403655
    
    
      min
      1.000000
      2.000000
      1.000000
      1.000000
      1.000000
      4.000000
      1977.000000
    
    
      25%
      9.000000
      21.000000
      3.000000
      5.000000
      8969.750000
      20.000000
      1984.000000
    
    
      50%
      16.000000
      34.000000
      6.000000
      11.000000
      17727.500000
      39.000000
      1990.000000
    
    
      75%
      23.000000
      36.000000
      9.000000
      17.000000
      26454.250000
      49.000000
      1997.000000
    
    
      max
      31.000000
      58.000000
      12.000000
      24.000000
      35548.000000
      280.000000
      2002.000000



In [52]:

    
sorted_data.mean()









    Out[52]:







  
    
      
      record_id
      month
      day
      year
      plot_id
      hindfoot_length
      weight
    
    
      sex
      
      
      
      
      
      
      
    
  
  
    
      F
      18036.412046
      6.583047
      16.007138
      1990.644997
      11.440854
      28.836780
      42.170555
    
    
      M
      17754.835601
      6.392668
      16.184286
      1990.480401
      11.098282
      29.709578
      42.995379



In [54]:

    
surveys_df.groupby(['plot_id', 'sex']).describe().stack()









    Out[54]:







  
    
      
      
      
      day
      hindfoot_length
      month
      record_id
      weight
      year
    
    
      plot_id
      sex
      
      
      
      
      
      
      
    
  
  
    
      1
      F
      count
      848.000000
      808.000000
      848.000000
      848.000000
      826.000000
      848.000000
    
    
      mean
      15.338443
      31.733911
      6.597877
      18390.384434
      46.311138
      1990.933962
    
    
      std
      8.325993
      8.894939
      3.366246
      10469.790852
      33.240958
      7.678171
    
    
      min
      1.000000
      14.000000
      1.000000
      9.000000
      5.000000
      1977.000000
    
    
      25%
      9.000000
      22.000000
      4.000000
      8783.500000
      26.000000
      1983.000000
    
    
      50%
      15.000000
      34.000000
      7.000000
      19182.500000
      40.000000
      1991.000000
    
    
      75%
      22.000000
      36.000000
      10.000000
      27691.750000
      50.000000
      1998.000000
    
    
      max
      31.000000
      64.000000
      12.000000
      35393.000000
      196.000000
      2002.000000
    
    
      M
      count
      1095.000000
      1047.000000
      1095.000000
      1095.000000
      1072.000000
      1095.000000
    
    
      mean
      15.905936
      34.302770
      6.121461
      17197.740639
      55.950560
      1990.091324
    
    
      std
      8.053257
      8.979955
      3.418795
      9911.570595
      41.035686
      7.265208
    
    
      min
      1.000000
      12.000000
      1.000000
      6.000000
      4.000000
      1977.000000
    
    
      25%
      9.000000
      32.000000
      3.000000
      8638.000000
      37.000000
      1983.000000
    
    
      50%
      16.000000
      36.000000
      6.000000
      17043.000000
      46.000000
      1990.000000
    
    
      75%
      23.000000
      37.000000
      9.000000
      25251.500000
      54.000000
      1997.000000
    
    
      max
      31.000000
      57.000000
      12.000000
      35390.000000
      231.000000
      2002.000000
    
    
      2
      F
      count
      970.000000
      918.000000
      970.000000
      970.000000
      954.000000
      970.000000
    
    
      mean
      16.288660
      30.161220
      6.426804
      17714.753608
      52.561845
      1990.449485
    
    
      std
      8.046509
      8.677937
      3.537694
      10300.015076
      45.547697
      7.519910
    
    
      min
      1.000000
      14.000000
      1.000000
      3.000000
      5.000000
      1977.000000
    
    
      25%
      10.000000
      21.000000
      3.000000
      9580.250000
      25.000000
      1984.000000
    
    
      50%
      16.000000
      33.000000
      6.000000
      18104.500000
      40.000000
      1990.000000
    
    
      75%
      23.000000
      36.000000
      10.000000
      26586.500000
      51.000000
      1997.000000
    
    
      max
      31.000000
      57.000000
      12.000000
      35405.000000
      274.000000
      2002.000000
    
    
      M
      count
      1144.000000
      1077.000000
      1144.000000
      1144.000000
      1114.000000
      1144.000000
    
    
      mean
      15.440559
      30.353760
      6.340035
      18085.458042
      51.391382
      1990.756119
    
    
      std
      8.414667
      9.016312
      3.623430
      10555.331260
      46.690887
      7.714444
    
    
      min
      1.000000
      13.000000
      1.000000
      1.000000
      5.000000
      1977.000000
    
    
      25%
      9.000000
      21.000000
      3.000000
      8653.000000
      24.000000
      1983.000000
    
    
      50%
      15.000000
      33.000000
      6.000000
      19024.500000
      42.000000
      1991.000000
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      23
      F
      std
      8.776973
      6.455268
      3.353006
      8854.378716
      18.933945
      6.454297
    
    
      min
      1.000000
      14.000000
      1.000000
      41.000000
      8.000000
      1977.000000
    
    
      25%
      9.000000
      16.000000
      4.000000
      9536.000000
      12.000000
      1984.000000
    
    
      50%
      15.000000
      18.000000
      7.000000
      14692.000000
      16.000000
      1988.000000
    
    
      75%
      24.000000
      20.000000
      10.000000
      21169.000000
      23.000000
      1993.000000
    
    
      max
      30.000000
      52.000000
      12.000000
      35489.000000
      199.000000
      2002.000000
    
    
      M
      count
      207.000000
      200.000000
      207.000000
      207.000000
      205.000000
      207.000000
    
    
      mean
      16.077295
      19.850000
      6.391304
      17091.338164
      18.941463
      1989.961353
    
    
      std
      7.955203
      5.980496
      3.543971
      8852.413083
      17.979740
      6.509027
    
    
      min
      1.000000
      14.000000
      1.000000
      55.000000
      4.000000
      1977.000000
    
    
      25%
      10.000000
      16.000000
      3.000000
      11274.000000
      10.000000
      1986.000000
    
    
      50%
      16.000000
      18.000000
      6.000000
      15693.000000
      12.000000
      1989.000000
    
    
      75%
      23.000000
      20.250000
      10.000000
      24740.500000
      22.000000
      1996.000000
    
    
      max
      31.000000
      50.000000
      12.000000
      35282.000000
      131.000000
      2002.000000
    
    
      24
      F
      count
      486.000000
      453.000000
      486.000000
      486.000000
      479.000000
      486.000000
    
    
      mean
      16.393004
      26.993377
      6.596708
      13702.224280
      47.914405
      1987.485597
    
    
      std
      8.367578
      8.561462
      3.327782
      8692.118528
      49.112574
      6.340412
    
    
      min
      1.000000
      12.000000
      1.000000
      1963.000000
      6.000000
      1979.000000
    
    
      25%
      9.000000
      20.000000
      4.000000
      7024.750000
      21.000000
      1982.000000
    
    
      50%
      16.000000
      22.000000
      7.000000
      11560.500000
      33.000000
      1986.000000
    
    
      75%
      24.000000
      35.000000
      10.000000
      19442.000000
      44.000000
      1991.000000
    
    
      max
      31.000000
      52.000000
      12.000000
      35283.000000
      251.000000
      2002.000000
    
    
      M
      count
      485.000000
      446.000000
      485.000000
      485.000000
      479.000000
      485.000000
    
    
      mean
      16.971134
      25.786996
      6.360825
      15208.136082
      39.321503
      1988.641237
    
    
      std
      8.431738
      8.350303
      3.452708
      9395.610252
      42.003947
      6.825992
    
    
      min
      1.000000
      12.000000
      1.000000
      2063.000000
      4.000000
      1979.000000
    
    
      25%
      10.000000
      19.000000
      3.000000
      6992.000000
      17.000000
      1982.000000
    
    
      50%
      17.000000
      21.000000
      6.000000
      12918.000000
      24.000000
      1987.000000
    
    
      75%
      24.000000
      35.000000
      10.000000
      22841.000000
      45.000000
      1995.000000
    
    
      max
      31.000000
      51.000000
      12.000000
      35479.000000
      230.000000
      2002.000000
    
  

384 rows × 6 columns



In [62]:

    
surveys_df.groupby('sex').sum().stack()









    Out[62]:





sex                 
F    record_id          282991305.0
     month                 103288.0
     day                   251152.0
     year                31233220.0
     plot_id               179507.0
     hindfoot_length       429495.0
     weight                645336.0
M    record_id          308010888.0
     month                 110900.0
     day                   280765.0
     year                34530854.0
     plot_id               192533.0
     hindfoot_length       489495.0
     weight                725719.0
dtype: float64



In [68]:

    
surveys_df.groupby('species_id').count()['record_id']['DO']









    Out[68]:





3027



In [74]:

    
surveys_df['weight'] * 100









    Out[74]:





0           NaN
1           NaN
2           NaN
3           NaN
4           NaN
5           NaN
6           NaN
7           NaN
8           NaN
9           NaN
10          NaN
11          NaN
12          NaN
13          NaN
14          NaN
15          NaN
16          NaN
17          NaN
18          NaN
19          NaN
20          NaN
21          NaN
22          NaN
23          NaN
24          NaN
25          NaN
26          NaN
27          NaN
28          NaN
29          NaN
          ...  
35519    3600.0
35520    4800.0
35521    4500.0
35522    4400.0
35523    2700.0
35524    2600.0
35525    2400.0
35526    4300.0
35527       NaN
35528    2500.0
35529       NaN
35530       NaN
35531    4300.0
35532    4800.0
35533    5600.0
35534    5300.0
35535    4200.0
35536    4600.0
35537    3100.0
35538    6800.0
35539    2300.0
35540    3100.0
35541    2900.0
35542    3400.0
35543       NaN
35544       NaN
35545       NaN
35546    1400.0
35547    5100.0
35548       NaN
Name: weight, Length: 35549, dtype: float64



In [81]:

    
surveys_df[['sex', 'species_id']]









    Out[81]:







  
    
      
      sex
      species_id
    
  
  
    
      0
      M
      NL
    
    
      1
      M
      NL
    
    
      2
      F
      DM
    
    
      3
      M
      DM
    
    
      4
      M
      DM
    
    
      5
      M
      PF
    
    
      6
      F
      PE
    
    
      7
      M
      DM
    
    
      8
      F
      DM
    
    
      9
      F
      PF
    
    
      10
      F
      DS
    
    
      11
      M
      DM
    
    
      12
      M
      DM
    
    
      13
      NaN
      DM
    
    
      14
      F
      DM
    
    
      15
      F
      DM
    
    
      16
      F
      DS
    
    
      17
      M
      PP
    
    
      18
      NaN
      PF
    
    
      19
      F
      DS
    
    
      20
      F
      DM
    
    
      21
      F
      NL
    
    
      22
      M
      DM
    
    
      23
      M
      SH
    
    
      24
      M
      DM
    
    
      25
      M
      DM
    
    
      26
      M
      DM
    
    
      27
      M
      DM
    
    
      28
      M
      PP
    
    
      29
      F
      DS
    
    
      ...
      ...
      ...
    
    
      35519
      NaN
      SF
    
    
      35520
      M
      DM
    
    
      35521
      F
      DM
    
    
      35522
      F
      DM
    
    
      35523
      F
      PB
    
    
      35524
      M
      OL
    
    
      35525
      F
      OT
    
    
      35526
      F
      DO
    
    
      35527
      NaN
      US
    
    
      35528
      F
      PB
    
    
      35529
      F
      OT
    
    
      35530
      F
      PB
    
    
      35531
      F
      DM
    
    
      35532
      F
      DM
    
    
      35533
      M
      DM
    
    
      35534
      M
      DM
    
    
      35535
      F
      DM
    
    
      35536
      F
      DM
    
    
      35537
      F
      PB
    
    
      35538
      M
      SF
    
    
      35539
      F
      PB
    
    
      35540
      F
      PB
    
    
      35541
      F
      PB
    
    
      35542
      F
      PB
    
    
      35543
      NaN
      US
    
    
      35544
      NaN
      AH
    
    
      35545
      NaN
      AH
    
    
      35546
      F
      RM
    
    
      35547
      M
      DO
    
    
      35548
      NaN
      NaN
    
  

35549 rows × 2 columns



In [84]:

    
surveys_df[['plot_id', 'sex', 'species_id']]









    Out[84]:







  
    
      
      plot_id
      sex
      species_id
    
  
  
    
      0
      2
      M
      NL
    
    
      1
      3
      M
      NL
    
    
      2
      2
      F
      DM
    
    
      3
      7
      M
      DM
    
    
      4
      3
      M
      DM
    
    
      5
      1
      M
      PF
    
    
      6
      2
      F
      PE
    
    
      7
      1
      M
      DM
    
    
      8
      1
      F
      DM
    
    
      9
      6
      F
      PF
    
    
      10
      5
      F
      DS
    
    
      11
      7
      M
      DM
    
    
      12
      3
      M
      DM
    
    
      13
      8
      NaN
      DM
    
    
      14
      6
      F
      DM
    
    
      15
      4
      F
      DM
    
    
      16
      3
      F
      DS
    
    
      17
      2
      M
      PP
    
    
      18
      4
      NaN
      PF
    
    
      19
      11
      F
      DS
    
    
      20
      14
      F
      DM
    
    
      21
      15
      F
      NL
    
    
      22
      13
      M
      DM
    
    
      23
      13
      M
      SH
    
    
      24
      9
      M
      DM
    
    
      25
      15
      M
      DM
    
    
      26
      15
      M
      DM
    
    
      27
      11
      M
      DM
    
    
      28
      11
      M
      PP
    
    
      29
      10
      F
      DS
    
    
      ...
      ...
      ...
      ...
    
    
      35519
      9
      NaN
      SF
    
    
      35520
      9
      M
      DM
    
    
      35521
      9
      F
      DM
    
    
      35522
      9
      F
      DM
    
    
      35523
      9
      F
      PB
    
    
      35524
      9
      M
      OL
    
    
      35525
      8
      F
      OT
    
    
      35526
      13
      F
      DO
    
    
      35527
      13
      NaN
      US
    
    
      35528
      13
      F
      PB
    
    
      35529
      13
      F
      OT
    
    
      35530
      13
      F
      PB
    
    
      35531
      14
      F
      DM
    
    
      35532
      14
      F
      DM
    
    
      35533
      14
      M
      DM
    
    
      35534
      14
      M
      DM
    
    
      35535
      14
      F
      DM
    
    
      35536
      14
      F
      DM
    
    
      35537
      15
      F
      PB
    
    
      35538
      15
      M
      SF
    
    
      35539
      15
      F
      PB
    
    
      35540
      15
      F
      PB
    
    
      35541
      15
      F
      PB
    
    
      35542
      15
      F
      PB
    
    
      35543
      15
      NaN
      US
    
    
      35544
      15
      NaN
      AH
    
    
      35545
      15
      NaN
      AH
    
    
      35546
      10
      F
      RM
    
    
      35547
      7
      M
      DO
    
    
      35548
      5
      NaN
      NaN
    
  

35549 rows × 3 columns



In [93]:

    
surveys_df[0:41:5]









    Out[93]:







  
    
      
      record_id
      month
      day
      year
      plot_id
      species_id
      sex
      hindfoot_length
      weight
    
  
  
    
      0
      1
      7
      16
      1977
      2
      NL
      M
      32.0
      NaN
    
    
      5
      6
      7
      16
      1977
      1
      PF
      M
      14.0
      NaN
    
    
      10
      11
      7
      16
      1977
      5
      DS
      F
      53.0
      NaN
    
    
      15
      16
      7
      16
      1977
      4
      DM
      F
      36.0
      NaN
    
    
      20
      21
      7
      17
      1977
      14
      DM
      F
      34.0
      NaN
    
    
      25
      26
      7
      17
      1977
      15
      DM
      M
      31.0
      NaN
    
    
      30
      31
      7
      17
      1977
      15
      DM
      F
      37.0
      NaN
    
    
      35
      36
      7
      17
      1977
      16
      OT
      F
      22.0
      NaN
    
    
      40
      41
      7
      18
      1977
      23
      DM
      F
      34.0
      NaN



In [99]:

    
surveys_df.iloc[0:3, 1:4]



In [100]:

    
surveys_df.loc[[0, 10], :]









    Out[100]:







  
    
      
      record_id
      month
      day
      year
      plot_id
      species_id
      sex
      hindfoot_length
      weight
    
  
  
    
      0
      1
      7
      16
      1977
      2
      NL
      M
      32.0
      NaN
    
    
      10
      11
      7
      16
      1977
      5
      DS
      F
      53.0
      NaN



In [102]:

    
surveys_df.loc[100, ['plot_id', 'species_id', 'weight']]









    Out[102]:





plot_id        5
species_id    DM
weight        46
Name: 100, dtype: object



In [108]:

    
surveys_df.loc[100, 'day':'month']









    Out[108]:





Series([], Name: 100, dtype: object)



In [119]:

    
surveys_df[(surveys_df.year == 2002) & (surveys_df.month == 2)]









    Out[119]:







  
    
      
      record_id
      month
      day
      year
      plot_id
      species_id
      sex
      hindfoot_length
      weight
    
  
  
    
      33396
      33397
      2
      9
      2002
      1
      RO
      M
      17.0
      8.0
    
    
      33397
      33398
      2
      9
      2002
      1
      DO
      F
      35.0
      35.0
    
    
      33398
      33399
      2
      9
      2002
      1
      PB
      M
      28.0
      48.0
    
    
      33399
      33400
      2
      9
      2002
      1
      DO
      M
      37.0
      60.0
    
    
      33400
      33401
      2
      9
      2002
      1
      DM
      M
      37.0
      47.0
    
    
      33401
      33402
      2
      9
      2002
      1
      DO
      M
      35.0
      40.0
    
    
      33402
      33403
      2
      9
      2002
      1
      OT
      M
      20.0
      26.0
    
    
      33403
      33404
      2
      9
      2002
      2
      PB
      M
      27.0
      54.0
    
    
      33404
      33405
      2
      9
      2002
      2
      RM
      M
      19.0
      8.0
    
    
      33405
      33406
      2
      9
      2002
      2
      DO
      F
      36.0
      49.0
    
    
      33406
      33407
      2
      9
      2002
      2
      DO
      M
      35.0
      52.0
    
    
      33407
      33408
      2
      9
      2002
      2
      PB
      M
      28.0
      41.0
    
    
      33408
      33409
      2
      9
      2002
      2
      PB
      M
      27.0
      49.0
    
    
      33409
      33410
      2
      9
      2002
      2
      DM
      F
      35.0
      46.0
    
    
      33410
      33411
      2
      9
      2002
      2
      PE
      M
      19.0
      25.0
    
    
      33411
      33412
      2
      9
      2002
      2
      PP
      M
      22.0
      17.0
    
    
      33412
      33413
      2
      9
      2002
      2
      SH
      F
      30.0
      57.0
    
    
      33413
      33414
      2
      9
      2002
      2
      PB
      F
      27.0
      31.0
    
    
      33414
      33415
      2
      9
      2002
      2
      NL
      M
      33.0
      120.0
    
    
      33415
      33416
      2
      9
      2002
      3
      PB
      M
      26.0
      44.0
    
    
      33416
      33417
      2
      9
      2002
      3
      DM
      M
      36.0
      48.0
    
    
      33417
      33418
      2
      9
      2002
      3
      PB
      F
      26.0
      26.0
    
    
      33418
      33419
      2
      9
      2002
      3
      DM
      M
      37.0
      54.0
    
    
      33419
      33420
      2
      9
      2002
      3
      OT
      F
      21.0
      24.0
    
    
      33420
      33421
      2
      9
      2002
      3
      PB
      F
      26.0
      28.0
    
    
      33421
      33422
      2
      9
      2002
      3
      PB
      M
      30.0
      55.0
    
    
      33422
      33423
      2
      9
      2002
      3
      DM
      M
      36.0
      49.0
    
    
      33423
      33424
      2
      9
      2002
      3
      DM
      F
      36.0
      47.0
    
    
      33424
      33425
      2
      9
      2002
      3
      DO
      F
      34.0
      51.0
    
    
      33425
      33426
      2
      9
      2002
      3
      DM
      M
      37.0
      52.0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      33540
      33541
      2
      10
      2002
      13
      PB
      M
      29.0
      49.0
    
    
      33541
      33542
      2
      10
      2002
      13
      PP
      F
      21.0
      15.0
    
    
      33542
      33543
      2
      10
      2002
      14
      DM
      M
      36.0
      48.0
    
    
      33543
      33544
      2
      10
      2002
      14
      AH
      NaN
      NaN
      NaN
    
    
      33544
      33545
      2
      10
      2002
      14
      DM
      F
      35.0
      39.0
    
    
      33545
      33546
      2
      10
      2002
      14
      OT
      F
      21.0
      21.0
    
    
      33546
      33547
      2
      10
      2002
      14
      DM
      F
      36.0
      44.0
    
    
      33547
      33548
      2
      10
      2002
      14
      DM
      F
      36.0
      46.0
    
    
      33548
      33549
      2
      10
      2002
      14
      NL
      M
      33.0
      222.0
    
    
      33549
      33550
      2
      10
      2002
      15
      PB
      F
      25.0
      31.0
    
    
      33550
      33551
      2
      10
      2002
      15
      RM
      F
      17.0
      7.0
    
    
      33551
      33552
      2
      10
      2002
      15
      AH
      NaN
      NaN
      NaN
    
    
      33552
      33553
      2
      10
      2002
      15
      AH
      NaN
      NaN
      NaN
    
    
      33553
      33554
      2
      10
      2002
      15
      RM
      F
      17.0
      10.0
    
    
      33554
      33555
      2
      10
      2002
      15
      PB
      M
      27.0
      45.0
    
    
      33555
      33556
      2
      10
      2002
      5
      RO
      M
      15.0
      9.0
    
    
      33556
      33557
      2
      10
      2002
      5
      RM
      F
      17.0
      9.0
    
    
      33557
      33558
      2
      10
      2002
      16
      PB
      F
      26.0
      25.0
    
    
      33558
      33559
      2
      10
      2002
      16
      DM
      M
      36.0
      38.0
    
    
      33559
      33560
      2
      10
      2002
      16
      DO
      F
      36.0
      51.0
    
    
      33560
      33561
      2
      10
      2002
      10
      RM
      F
      17.0
      8.0
    
    
      33561
      33562
      2
      10
      2002
      10
      DO
      F
      35.0
      50.0
    
    
      33562
      33563
      2
      10
      2002
      10
      DO
      M
      34.0
      52.0
    
    
      33563
      33564
      2
      10
      2002
      10
      DO
      M
      38.0
      51.0
    
    
      33564
      33565
      2
      10
      2002
      10
      RO
      F
      16.0
      8.0
    
    
      33565
      33566
      2
      10
      2002
      7
      DM
      M
      35.0
      42.0
    
    
      33566
      33567
      2
      10
      2002
      7
      DO
      M
      36.0
      62.0
    
    
      33567
      33568
      2
      10
      2002
      7
      DO
      F
      37.0
      55.0
    
    
      33568
      33569
      2
      10
      2002
      7
      DO
      F
      38.0
      47.0
    
    
      33569
      33570
      2
      10
      2002
      7
      DO
      F
      35.0
      54.0
    
  

174 rows × 9 columns



In [127]:

    
surveys_df[(surveys_df.year >= 2000) & (surveys_df.year <= 2004)].head()









    Out[127]:







  
    
      
      record_id
      month
      day
      year
      plot_id
      species_id
      sex
      hindfoot_length
      weight
    
  
  
    
      30158
      30159
      1
      8
      2000
      1
      PP
      F
      22.0
      17.0
    
    
      30159
      30160
      1
      8
      2000
      1
      DO
      M
      35.0
      53.0
    
    
      30160
      30161
      1
      8
      2000
      1
      PP
      F
      21.0
      17.0
    
    
      30161
      30162
      1
      8
      2000
      1
      DM
      M
      36.0
      50.0
    
    
      30162
      30163
      1
      8
      2000
      1
      PP
      M
      20.0
      16.0



In [132]:

    
surveys_df[surveys_df['species_id'].isin(['NL', 'DM', 'PF', 'PE'])].head()









    Out[132]:







  
    
      
      record_id
      month
      day
      year
      plot_id
      species_id
      sex
      hindfoot_length
      weight
    
  
  
    
      0
      1
      7
      16
      1977
      2
      NL
      M
      32.0
      NaN
    
    
      1
      2
      7
      16
      1977
      3
      NL
      M
      33.0
      NaN
    
    
      2
      3
      7
      16
      1977
      2
      DM
      F
      37.0
      NaN
    
    
      3
      4
      7
      16
      1977
      7
      DM
      M
      36.0
      NaN
    
    
      4
      5
      7
      16
      1977
      3
      DM
      M
      35.0
      NaN



In [137]:

    
surveys_df[~pd.isnull(surveys_df).any(axis=1)].head()









    Out[137]:







  
    
      
      record_id
      month
      day
      year
      plot_id
      species_id
      sex
      hindfoot_length
      weight
    
  
  
    
      62
      63
      8
      19
      1977
      3
      DM
      M
      35.0
      40.0
    
    
      63
      64
      8
      19
      1977
      7
      DM
      M
      37.0
      48.0
    
    
      64
      65
      8
      19
      1977
      4
      DM
      F
      34.0
      29.0
    
    
      65
      66
      8
      19
      1977
      4
      DM
      F
      35.0
      46.0
    
    
      66
      67
      8
      19
      1977
      7
      DM
      M
      35.0
      36.0



In [166]:

    
idx = pd.IndexSlice

def clean_date(df):
    return pd.to_datetime(df[['year', 'month', 'day']],
                          errors='cerror')



In [167]:

    
weights = (surveys_df.
              assign(date = clean_date).
              set_index(['date', 'species_id', 'sex']))['weight']









    



---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-167-d33e73fe7373> in <module>()
      1 weights = (surveys_df.
----> 2               assign(date = clean_date).
      3               set_index(['date', 'species_id', 'sex']))['weight']

/usr/local/lib/python3.6/site-packages/pandas/core/frame.py in assign(self, **kwargs)
   2495         results = {}
   2496         for k, v in kwargs.items():
-> 2497             results[k] = com._apply_if_callable(v, data)
   2498 
   2499         # ... and then assign

/usr/local/lib/python3.6/site-packages/pandas/core/common.py in _apply_if_callable(maybe_callable, obj, **kwargs)
    439     """
    440     if callable(maybe_callable):
--> 441         return maybe_callable(obj, **kwargs)
    442     return maybe_callable
    443 

<ipython-input-166-5817482470a1> in clean_date(df)
      3 def clean_date(df):
      4     return pd.to_datetime(df[['year', 'month', 'day']],
----> 5                           errors='cerror')

/usr/local/lib/python3.6/site-packages/pandas/core/tools/datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin)
    510         result = Series(values, index=arg.index, name=arg.name)
    511     elif isinstance(arg, (ABCDataFrame, MutableMapping)):
--> 512         result = _assemble_from_unit_mappings(arg, errors=errors)
    513     elif isinstance(arg, ABCIndexClass):
    514         result = _convert_listlike(arg, box, format, name=arg.name)

/usr/local/lib/python3.6/site-packages/pandas/core/tools/datetimes.py in _assemble_from_unit_mappings(arg, errors)
    611               coerce(arg[unit_rev['day']]))
    612     try:
--> 613         values = to_datetime(values, format='%Y%m%d', errors=errors)
    614     except (TypeError, ValueError) as e:
    615         raise ValueError("cannot assemble the "

/usr/local/lib/python3.6/site-packages/pandas/core/tools/datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin)
    507     elif isinstance(arg, ABCSeries):
    508         from pandas import Series
--> 509         values = _convert_listlike(arg._values, False, format)
    510         result = Series(values, index=arg.index, name=arg.name)
    511     elif isinstance(arg, (ABCDataFrame, MutableMapping)):

/usr/local/lib/python3.6/site-packages/pandas/core/tools/datetimes.py in _convert_listlike(arg, box, format, name, tz)
    412                     try:
    413                         result = tslib.array_strptime(arg, format, exact=exact,
--> 414                                                       errors=errors)
    415                     except tslib.OutOfBoundsDatetime:
    416                         if errors == 'raise':

pandas/_libs/tslib.pyx in pandas._libs.tslib.array_strptime (pandas/_libs/tslib.c:62494)()

AssertionError:



In [158]:

    
surveys_df[['hindfoot_length', 'weight']].apply(lambda x: x.mean())









    Out[158]:





hindfoot_length    29.287932
weight             42.672428
dtype: float64



In [162]:

    
def standardize(x):
    return (x - x.mean() / x.std())

surveys_df[['hindfoot_length', 'weight']].apply(standardize).tail()









    Out[162]:







  
    
      
      hindfoot_length
      weight
    
  
  
    
      35544
      NaN
      NaN
    
    
      35545
      NaN
      NaN
    
    
      35546
      11.937933
      12.835082
    
    
      35547
      32.937933
      49.835082
    
    
      35548
      NaN
      NaN



In [165]:

    
(surveys_df[['hindfoot_length', 'weight']].
     apply(standardize).
     applymap(lambda x: '{:03.2f}'.format(x)).tail())









    Out[165]:







  
    
      
      hindfoot_length
      weight
    
  
  
    
      35544
      nan
      nan
    
    
      35545
      nan
      nan
    
    
      35546
      11.94
      12.84
    
    
      35547
      32.94
      49.84
    
    
      35548
      nan
      nan



In [ ]:

	record_id	month	day	year	plot_id	species_id	sex	hindfoot_length	weight
35544	35545	12	31	2002	15	AH	NaN	NaN	NaN
35545	35546	12	31	2002	15	AH	NaN	NaN	NaN
35546	35547	12	31	2002	10	RM	F	15.0	14.0
35547	35548	12	31	2002	7	DO	M	36.0	51.0
35548	35549	12	31	2002	5	NaN	NaN	NaN	NaN

		day	hindfoot_length	month	plot_id	record_id	weight	year
sex
F	count	15690.000000	14894.000000	15690.000000	15690.000000	15690.000000	15303.000000	15690.000000
	mean	16.007138	28.836780	6.583047	11.440854	18036.412046	42.170555	1990.644997
	std	8.271144	9.463789	3.367350	6.870684	10423.089000	36.847958	7.598725
	min	1.000000	7.000000	1.000000	1.000000	3.000000	4.000000	1977.000000
	25%	9.000000	21.000000	4.000000	5.000000	8917.500000	20.000000	1984.000000
	50%	16.000000	27.000000	7.000000	12.000000	18075.500000	34.000000	1990.000000
	75%	23.000000	36.000000	10.000000	17.000000	27250.000000	46.000000	1997.000000
	max	31.000000	64.000000	12.000000	24.000000	35547.000000	274.000000	2002.000000
M	count	17348.000000	16476.000000	17348.000000	17348.000000	17348.000000	16879.000000	17348.000000
	mean	16.184286	29.709578	6.392668	11.098282	17754.835601	42.995379	1990.480401
	std	8.199274	9.629246	3.420806	6.728713	10132.203323	36.184981	7.403655
	min	1.000000	2.000000	1.000000	1.000000	1.000000	4.000000	1977.000000
	25%	9.000000	21.000000	3.000000	5.000000	8969.750000	20.000000	1984.000000
	50%	16.000000	34.000000	6.000000	11.000000	17727.500000	39.000000	1990.000000
	75%	23.000000	36.000000	9.000000	17.000000	26454.250000	49.000000	1997.000000
	max	31.000000	58.000000	12.000000	24.000000	35548.000000	280.000000	2002.000000

			day	hindfoot_length	month	record_id	weight	year
plot_id	sex
1	F	count	848.000000	808.000000	848.000000	848.000000	826.000000	848.000000
		mean	15.338443	31.733911	6.597877	18390.384434	46.311138	1990.933962
		std	8.325993	8.894939	3.366246	10469.790852	33.240958	7.678171
		min	1.000000	14.000000	1.000000	9.000000	5.000000	1977.000000
		25%	9.000000	22.000000	4.000000	8783.500000	26.000000	1983.000000
		50%	15.000000	34.000000	7.000000	19182.500000	40.000000	1991.000000
		75%	22.000000	36.000000	10.000000	27691.750000	50.000000	1998.000000
		max	31.000000	64.000000	12.000000	35393.000000	196.000000	2002.000000
	M	count	1095.000000	1047.000000	1095.000000	1095.000000	1072.000000	1095.000000
		mean	15.905936	34.302770	6.121461	17197.740639	55.950560	1990.091324
		std	8.053257	8.979955	3.418795	9911.570595	41.035686	7.265208
		min	1.000000	12.000000	1.000000	6.000000	4.000000	1977.000000
		25%	9.000000	32.000000	3.000000	8638.000000	37.000000	1983.000000
		50%	16.000000	36.000000	6.000000	17043.000000	46.000000	1990.000000
		75%	23.000000	37.000000	9.000000	25251.500000	54.000000	1997.000000
		max	31.000000	57.000000	12.000000	35390.000000	231.000000	2002.000000
2	F	count	970.000000	918.000000	970.000000	970.000000	954.000000	970.000000
		mean	16.288660	30.161220	6.426804	17714.753608	52.561845	1990.449485
		std	8.046509	8.677937	3.537694	10300.015076	45.547697	7.519910
		min	1.000000	14.000000	1.000000	3.000000	5.000000	1977.000000
		25%	10.000000	21.000000	3.000000	9580.250000	25.000000	1984.000000
		50%	16.000000	33.000000	6.000000	18104.500000	40.000000	1990.000000
		75%	23.000000	36.000000	10.000000	26586.500000	51.000000	1997.000000
		max	31.000000	57.000000	12.000000	35405.000000	274.000000	2002.000000
	M	count	1144.000000	1077.000000	1144.000000	1144.000000	1114.000000	1144.000000
		mean	15.440559	30.353760	6.340035	18085.458042	51.391382	1990.756119
		std	8.414667	9.016312	3.623430	10555.331260	46.690887	7.714444
		min	1.000000	13.000000	1.000000	1.000000	5.000000	1977.000000
		25%	9.000000	21.000000	3.000000	8653.000000	24.000000	1983.000000
		50%	15.000000	33.000000	6.000000	19024.500000	42.000000	1991.000000
...	...	...	...	...	...	...	...	...
23	F	std	8.776973	6.455268	3.353006	8854.378716	18.933945	6.454297
		min	1.000000	14.000000	1.000000	41.000000	8.000000	1977.000000
		25%	9.000000	16.000000	4.000000	9536.000000	12.000000	1984.000000
		50%	15.000000	18.000000	7.000000	14692.000000	16.000000	1988.000000
		75%	24.000000	20.000000	10.000000	21169.000000	23.000000	1993.000000
		max	30.000000	52.000000	12.000000	35489.000000	199.000000	2002.000000
	M	count	207.000000	200.000000	207.000000	207.000000	205.000000	207.000000
		mean	16.077295	19.850000	6.391304	17091.338164	18.941463	1989.961353
		std	7.955203	5.980496	3.543971	8852.413083	17.979740	6.509027
		min	1.000000	14.000000	1.000000	55.000000	4.000000	1977.000000
		25%	10.000000	16.000000	3.000000	11274.000000	10.000000	1986.000000
		50%	16.000000	18.000000	6.000000	15693.000000	12.000000	1989.000000
		75%	23.000000	20.250000	10.000000	24740.500000	22.000000	1996.000000
		max	31.000000	50.000000	12.000000	35282.000000	131.000000	2002.000000
24	F	count	486.000000	453.000000	486.000000	486.000000	479.000000	486.000000
		mean	16.393004	26.993377	6.596708	13702.224280	47.914405	1987.485597
		std	8.367578	8.561462	3.327782	8692.118528	49.112574	6.340412
		min	1.000000	12.000000	1.000000	1963.000000	6.000000	1979.000000
		25%	9.000000	20.000000	4.000000	7024.750000	21.000000	1982.000000
		50%	16.000000	22.000000	7.000000	11560.500000	33.000000	1986.000000
		75%	24.000000	35.000000	10.000000	19442.000000	44.000000	1991.000000
		max	31.000000	52.000000	12.000000	35283.000000	251.000000	2002.000000
	M	count	485.000000	446.000000	485.000000	485.000000	479.000000	485.000000
		mean	16.971134	25.786996	6.360825	15208.136082	39.321503	1988.641237
		std	8.431738	8.350303	3.452708	9395.610252	42.003947	6.825992
		min	1.000000	12.000000	1.000000	2063.000000	4.000000	1979.000000
		25%	10.000000	19.000000	3.000000	6992.000000	17.000000	1982.000000
		50%	17.000000	21.000000	6.000000	12918.000000	24.000000	1987.000000
		75%	24.000000	35.000000	10.000000	22841.000000	45.000000	1995.000000
		max	31.000000	51.000000	12.000000	35479.000000	230.000000	2002.000000

	sex	species_id
0	M	NL
1	M	NL
2	F	DM
3	M	DM
4	M	DM
5	M	PF
6	F	PE
7	M	DM
8	F	DM
9	F	PF
10	F	DS
11	M	DM
12	M	DM
13	NaN	DM
14	F	DM
15	F	DM
16	F	DS
17	M	PP
18	NaN	PF
19	F	DS
20	F	DM
21	F	NL
22	M	DM
23	M	SH
24	M	DM
25	M	DM
26	M	DM
27	M	DM
28	M	PP
29	F	DS
...	...	...
35519	NaN	SF
35520	M	DM
35521	F	DM
35522	F	DM
35523	F	PB
35524	M	OL
35525	F	OT
35526	F	DO
35527	NaN	US
35528	F	PB
35529	F	OT
35530	F	PB
35531	F	DM
35532	F	DM
35533	M	DM
35534	M	DM
35535	F	DM
35536	F	DM
35537	F	PB
35538	M	SF
35539	F	PB
35540	F	PB
35541	F	PB
35542	F	PB
35543	NaN	US
35544	NaN	AH
35545	NaN	AH
35546	F	RM
35547	M	DO
35548	NaN	NaN

	record_id	month	day	year	plot_id	species_id	sex	hindfoot_length	weight
0	1	7	16	1977	2	NL	M	32.0	NaN
5	6	7	16	1977	1	PF	M	14.0	NaN
10	11	7	16	1977	5	DS	F	53.0	NaN
15	16	7	16	1977	4	DM	F	36.0	NaN
20	21	7	17	1977	14	DM	F	34.0	NaN
25	26	7	17	1977	15	DM	M	31.0	NaN
30	31	7	17	1977	15	DM	F	37.0	NaN
35	36	7	17	1977	16	OT	F	22.0	NaN
40	41	7	18	1977	23	DM	F	34.0	NaN

	record_id	month	day	year	plot_id	species_id	sex	hindfoot_length	weight
33396	33397	2	9	2002	1	RO	M	17.0	8.0
33397	33398	2	9	2002	1	DO	F	35.0	35.0
33398	33399	2	9	2002	1	PB	M	28.0	48.0
33399	33400	2	9	2002	1	DO	M	37.0	60.0
33400	33401	2	9	2002	1	DM	M	37.0	47.0
33401	33402	2	9	2002	1	DO	M	35.0	40.0
33402	33403	2	9	2002	1	OT	M	20.0	26.0
33403	33404	2	9	2002	2	PB	M	27.0	54.0
33404	33405	2	9	2002	2	RM	M	19.0	8.0
33405	33406	2	9	2002	2	DO	F	36.0	49.0
33406	33407	2	9	2002	2	DO	M	35.0	52.0
33407	33408	2	9	2002	2	PB	M	28.0	41.0
33408	33409	2	9	2002	2	PB	M	27.0	49.0
33409	33410	2	9	2002	2	DM	F	35.0	46.0
33410	33411	2	9	2002	2	PE	M	19.0	25.0
33411	33412	2	9	2002	2	PP	M	22.0	17.0
33412	33413	2	9	2002	2	SH	F	30.0	57.0
33413	33414	2	9	2002	2	PB	F	27.0	31.0
33414	33415	2	9	2002	2	NL	M	33.0	120.0
33415	33416	2	9	2002	3	PB	M	26.0	44.0
33416	33417	2	9	2002	3	DM	M	36.0	48.0
33417	33418	2	9	2002	3	PB	F	26.0	26.0
33418	33419	2	9	2002	3	DM	M	37.0	54.0
33419	33420	2	9	2002	3	OT	F	21.0	24.0
33420	33421	2	9	2002	3	PB	F	26.0	28.0
33421	33422	2	9	2002	3	PB	M	30.0	55.0
33422	33423	2	9	2002	3	DM	M	36.0	49.0
33423	33424	2	9	2002	3	DM	F	36.0	47.0
33424	33425	2	9	2002	3	DO	F	34.0	51.0
33425	33426	2	9	2002	3	DM	M	37.0	52.0
...	...	...	...	...	...	...	...	...	...
33540	33541	2	10	2002	13	PB	M	29.0	49.0
33541	33542	2	10	2002	13	PP	F	21.0	15.0
33542	33543	2	10	2002	14	DM	M	36.0	48.0
33543	33544	2	10	2002	14	AH	NaN	NaN	NaN
33544	33545	2	10	2002	14	DM	F	35.0	39.0
33545	33546	2	10	2002	14	OT	F	21.0	21.0
33546	33547	2	10	2002	14	DM	F	36.0	44.0
33547	33548	2	10	2002	14	DM	F	36.0	46.0
33548	33549	2	10	2002	14	NL	M	33.0	222.0
33549	33550	2	10	2002	15	PB	F	25.0	31.0
33550	33551	2	10	2002	15	RM	F	17.0	7.0
33551	33552	2	10	2002	15	AH	NaN	NaN	NaN
33552	33553	2	10	2002	15	AH	NaN	NaN	NaN
33553	33554	2	10	2002	15	RM	F	17.0	10.0
33554	33555	2	10	2002	15	PB	M	27.0	45.0
33555	33556	2	10	2002	5	RO	M	15.0	9.0
33556	33557	2	10	2002	5	RM	F	17.0	9.0
33557	33558	2	10	2002	16	PB	F	26.0	25.0
33558	33559	2	10	2002	16	DM	M	36.0	38.0
33559	33560	2	10	2002	16	DO	F	36.0	51.0
33560	33561	2	10	2002	10	RM	F	17.0	8.0
33561	33562	2	10	2002	10	DO	F	35.0	50.0
33562	33563	2	10	2002	10	DO	M	34.0	52.0
33563	33564	2	10	2002	10	DO	M	38.0	51.0
33564	33565	2	10	2002	10	RO	F	16.0	8.0
33565	33566	2	10	2002	7	DM	M	35.0	42.0
33566	33567	2	10	2002	7	DO	M	36.0	62.0
33567	33568	2	10	2002	7	DO	F	37.0	55.0
33568	33569	2	10	2002	7	DO	F	38.0	47.0
33569	33570	2	10	2002	7	DO	F	35.0	54.0

	record_id	month	day	year	plot_id	species_id	sex	hindfoot_length	weight
30158	30159	1	8	2000	1	PP	F	22.0	17.0
30159	30160	1	8	2000	1	DO	M	35.0	53.0
30160	30161	1	8	2000	1	PP	F	21.0	17.0
30161	30162	1	8	2000	1	DM	M	36.0	50.0
30162	30163	1	8	2000	1	PP	M	20.0	16.0