In [17]:
__author__ = 'aqeel'
#Note Remove This Line to Get the Figure in Seperate Window
%matplotlib inline
import matplotlib.pyplot as plt
import csv
import numpy as np

In [53]:
with open ('ncmp_1415_final_non_disclosive.csv','rb') as datafile:
    spamreader = csv.reader(datafile)
    data = list(spamreader)

In [54]:
col = np.array(data[0])
data = np.matrix(data[1:])

In [55]:
col


Out[55]:
array(['ncmppseudosystemid', 'genderdescription', 'ageinmonths',
       'schoolyear', 'height', 'heightzscore', 'heightpscore', 'weight',
       'weightzscore', 'weightpscore', 'bmi', 'bmizscore', 'bmipscore',
       'bmipopulationcategory', 'bmiclinicalcategory',
       'schooltier1localauthority', 'schooltier2localauthority',
       'schoolgovernmentofficeregion', 'schoolindexofmultipledepriv',
       'pupilschooldistancebanded', 'suppress_record_high',
       'suppress_record_low', 'suppress_table', 'suppress_imd'], 
      dtype='|S28')

In [134]:
samplerows= np.random.randint(data.shape[0],size=5000)
sampledata =np.array( data[samplerows,:])
sampledata.shape


Out[134]:
(5000, 24)

In [95]:
def ColIndex(colname):
    return np.where(col==colname)[0][0]

In [83]:
negativedata = sampledata[sampledata[ColIndex('height')]<0,]

In [137]:
type(sampledata)


Out[137]:
numpy.ndarray

In [152]:
sampledata[:,0]


Out[152]:
array(['758243', '1064518', '396672', ..., '618479', '554158', '1093270'], 
      dtype='|S20')