In [ ]:
import pandas as pd
pd.__version__

In [ ]:
fname = '/Users/maye/data/planet4/2014-02-02_planet_four_classifications.csv'

In [ ]:
df = pd.read_csv(fname, na_values=['null'], nrows=1e6)

# data = [chunk for chunk in reader]
# df = pd.concat(data, ignore_index=True)

In [ ]:
df.columns

In [ ]:
df.marking.value_counts()

In [ ]:
blotches = df[df.marking == 'blotch']

In [ ]:
fans = df[df.marking == 'fan']

In [ ]:
s = blotches.iloc[0]

In [ ]:
s

In [ ]:
df.marking.unique()

In [ ]:
def no_of_nulls(row):
    if row.marking in ['interesting', 'none']:
        return True
    if row[row.isnull()].shape[0] !=2:
        return False
    else:
        return True

In [ ]:
df['okay']=True

In [ ]:
df['okay'] = df.apply(no_of_nulls, axis=1)

In [ ]:
df.okay.value_counts()

In [ ]:
df[df.okay].shape

In [ ]:
from P4_sandbox import get_data

In [ ]:
im = get_data.get_image_from_record(fans.loc[357425 ])

In [ ]:
imshow(im)

In [ ]:
fans.spread.hist()

In [ ]:
df.acquisition_date = pd.to_datetime(df.acquisition_date)

In [ ]:
df.dtypes

In [ ]:
df.to_hdf?

In [ ]:
df.to_hdf('/Users/maye/data/planet4/2013-10-27_planet_four_classifications.h5',
          'df')

In [ ]:
# data_columns=['image_id','image_name','user_name','marking'])