notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np



In [13]:

    
hits = pd.read_csv('./../data/hits.tsv', sep='\t', names=['game', 'inning', 'pitcher', 'hitter', 'class', 'description',
                                                            'x', 'y'])
hits.head()









    Out[13]:






  
    
      
      game
      inning
      pitcher
      hitter
      class
      description
      x
      y
    
  
  
    
      0
       1
          Top
       Ubaldo Jimenez
          Ian Kinsler
       O
       Groundout
       117.47
       147.59
    
    
      1
       1
          Top
       Ubaldo Jimenez
         Torii Hunter
       O
       Groundout
       107.43
       153.61
    
    
      2
       1
          Top
       Ubaldo Jimenez
       Miguel Cabrera
       O
       Groundout
       100.40
       164.66
    
    
      3
       1
       Bottom
           Drew Smyly
        Manny Machado
       O
          Flyout
        83.33
        94.38
    
    
      4
       1
       Bottom
           Drew Smyly
           Adam Jones
       H
        Home Run
        26.10
        99.40
    
  

5 rows × 8 columns



In [14]:

    
hits.shape









    Out[14]:





(31527, 8)



In [15]:

    
nans = pd.isnull(hits).any(1).nonzero()



In [16]:

    
nans









    Out[16]:





(array([], dtype=int64),)



In [17]:

    
hits = hits.drop(hits.index[nans])
hits.shape









    Out[17]:





(31527, 8)



In [18]:

    
hits = hits.drop_duplicates()
hits.shape









    Out[18]:





(31464, 8)



In [20]:

    
# Have a unique id for each hit, d3 will use this as the key
hits['id'] = hits.index
hits.head()









    Out[20]:






  
    
      
      game
      inning
      pitcher
      hitter
      class
      description
      x
      y
      id
    
  
  
    
      0
       1
          Top
       Ubaldo Jimenez
          Ian Kinsler
       O
       Groundout
       117.47
       147.59
       0
    
    
      1
       1
          Top
       Ubaldo Jimenez
         Torii Hunter
       O
       Groundout
       107.43
       153.61
       1
    
    
      2
       1
          Top
       Ubaldo Jimenez
       Miguel Cabrera
       O
       Groundout
       100.40
       164.66
       2
    
    
      3
       1
       Bottom
           Drew Smyly
        Manny Machado
       O
          Flyout
        83.33
        94.38
       3
    
    
      4
       1
       Bottom
           Drew Smyly
           Adam Jones
       H
        Home Run
        26.10
        99.40
       4
    
  

5 rows × 9 columns



In [21]:

    
hits.to_csv("../data/newhits.tsv", sep="\t", index=False)



In [ ]:

	game	inning	pitcher	hitter	class	description	x	y
0	1	Top	Ubaldo Jimenez	Ian Kinsler	O	Groundout	117.47	147.59
1	1	Top	Ubaldo Jimenez	Torii Hunter	O	Groundout	107.43	153.61
2	1	Top	Ubaldo Jimenez	Miguel Cabrera	O	Groundout	100.40	164.66
3	1	Bottom	Drew Smyly	Manny Machado	O	Flyout	83.33	94.38
4	1	Bottom	Drew Smyly	Adam Jones	H	Home Run	26.10	99.40