In [1]:
csvfile = "GENdata.csv"

In [3]:
gendata = open(csvfile, 'r')

In [4]:
gendata


Out[4]:
<_io.TextIOWrapper name='GENdata.csv' mode='r' encoding='cp1252'>

In [5]:
memdata = gendata.read()

In [6]:
memdata


Out[6]:
'PatientID,GeneticMarker,TestGroup\n1234,sep13,A\n2345,oct14,B\n98754,nov12,A\n323424,mar02,B\n'

In [7]:
print(memdata)


PatientID,GeneticMarker,TestGroup
1234,sep13,A
2345,oct14,B
98754,nov12,A
323424,mar02,B


In [8]:
import pandas as pd

In [12]:
csvdf = pd.read_csv(csvfile)

In [13]:
csvdf


Out[13]:
PatientID GeneticMarker TestGroup
0 1234 sep13 A
1 2345 oct14 B
2 98754 nov12 A
3 323424 mar02 B

In [16]:
csvdf.TestGroup.astype('category')


Out[16]:
0    A
1    B
2    A
3    B
Name: TestGroup, dtype: category
Categories (2, object): [A, B]

In [17]:
csvhist = csvdf.hist(by=csvdf.TestGroup)

In [18]:
csvdf.describe


Out[18]:
<bound method NDFrame.describe of    PatientID GeneticMarker TestGroup
0       1234         sep13         A
1       2345         oct14         B
2      98754         nov12         A
3     323424         mar02         B>

In [ ]: