In [3]:
csvfile = "GENdata.csv"

In [4]:
csvfile


Out[4]:
'GENdata.csv'

In [27]:
gendata = open(csvfile,'r')

In [28]:
gendata


Out[28]:
<_io.TextIOWrapper name='GENdata.csv' mode='r' encoding='UTF-8'>

In [29]:
memdata = gendata.read()

In [30]:
memdata


Out[30]:
'PatientID,GeneticMarker,TestGroup\n1234,sep13,A\n2345,oct14,B\n98754,nov12,A\n323424,mar02,B\n'

In [31]:
import pandas as pd

In [34]:
csvdf = pd.read_csv(csvfile)
csvdf

In [35]:
print csvdf


  File "<ipython-input-35-e486b52a4867>", line 1
    print csvdf
              ^
SyntaxError: Missing parentheses in call to 'print'

In [36]:
print (csvdf)


   PatientID GeneticMarker TestGroup
0       1234         sep13         A
1       2345         oct14         B
2      98754         nov12         A
3     323424         mar02         B

In [37]:
csvdf.hist


Out[37]:
<bound method hist_frame of    PatientID GeneticMarker TestGroup
0       1234         sep13         A
1       2345         oct14         B
2      98754         nov12         A
3     323424         mar02         B>

In [40]:
csvdf.TestGroup.astype('category')


Out[40]:
0    A
1    B
2    A
3    B
Name: TestGroup, dtype: category
Categories (2, object): [A, B]

In [43]:
csvhist = csvdf.hist(by=csvdf.TestGroup)

In [44]:
csvhist


Out[44]:
array([<matplotlib.axes._subplots.AxesSubplot object at 0x1138b09b0>,
       <matplotlib.axes._subplots.AxesSubplot object at 0x114fd4cf8>], dtype=object)

In [45]:
csvdf.describe


Out[45]:
<bound method NDFrame.describe of    PatientID GeneticMarker TestGroup
0       1234         sep13         A
1       2345         oct14         B
2      98754         nov12         A
3     323424         mar02         B>

In [ ]: