In [1]:
import pandas as pd
import numpy as np
In [13]:
hits = pd.read_csv('./../data/hits.tsv', sep='\t', names=['game', 'inning', 'pitcher', 'hitter', 'class', 'description',
'x', 'y'])
hits.head()
Out[13]:
In [14]:
hits.shape
Out[14]:
In [15]:
nans = pd.isnull(hits).any(1).nonzero()
In [16]:
nans
Out[16]:
In [17]:
hits = hits.drop(hits.index[nans])
hits.shape
Out[17]:
In [18]:
hits = hits.drop_duplicates()
hits.shape
Out[18]:
In [20]:
# Have a unique id for each hit, d3 will use this as the key
hits['id'] = hits.index
hits.head()
Out[20]:
In [21]:
hits.to_csv("../data/newhits.tsv", sep="\t", index=False)
In [ ]: