In [ ]:
import pandas as pd
pd.__version__
In [ ]:
fname = '/Users/maye/data/planet4/2014-02-02_planet_four_classifications.csv'
In [ ]:
df = pd.read_csv(fname, na_values=['null'], nrows=1e6)
# data = [chunk for chunk in reader]
# df = pd.concat(data, ignore_index=True)
In [ ]:
df.columns
In [ ]:
df.marking.value_counts()
In [ ]:
blotches = df[df.marking == 'blotch']
In [ ]:
fans = df[df.marking == 'fan']
In [ ]:
s = blotches.iloc[0]
In [ ]:
s
In [ ]:
df.marking.unique()
In [ ]:
def no_of_nulls(row):
if row.marking in ['interesting', 'none']:
return True
if row[row.isnull()].shape[0] !=2:
return False
else:
return True
In [ ]:
df['okay']=True
In [ ]:
df['okay'] = df.apply(no_of_nulls, axis=1)
In [ ]:
df.okay.value_counts()
In [ ]:
df[df.okay].shape
In [ ]:
from P4_sandbox import get_data
In [ ]:
im = get_data.get_image_from_record(fans.loc[357425 ])
In [ ]:
imshow(im)
In [ ]:
fans.spread.hist()
In [ ]:
df.acquisition_date = pd.to_datetime(df.acquisition_date)
In [ ]:
df.dtypes
In [ ]:
df.to_hdf?
In [ ]:
df.to_hdf('/Users/maye/data/planet4/2013-10-27_planet_four_classifications.h5',
'df')
In [ ]:
# data_columns=['image_id','image_name','user_name','marking'])