In [27]:
import os
import sys

dirname_data = '../../data/Flounder'

filename_rpkmA = os.path.join(dirname_data, 'Kwon201608_KoreanFlounder_Liver.rpkm.txt')

def read_rpkm(filename):
    rv = dict()
    
    f = open(filename,'r')
    
    headers = f.readline().strip().split("\t")
  
    for line in f:
        tokens = line.strip().split()
        gene_id = tokens[0]
        rv[gene_id] = dict()

        sum_rpkm = 0
        for i in range(1,len(tokens)):
            rv[gene_id][headers[i]] = float(tokens[i])
            sum_rpkm += float(tokens[i])
        rv[gene_id]['mean'] = sum_rpkm*1.0/(len(tokens)-1)
    f.close()
    return rv

rpkmA = read_rpkm(filename_rpkmA)

In [29]:
%matplotlib inline

import math
import matplotlib.pyplot as plt

geneListA = sorted(rpkmA.keys())
sampleListA = sorted(rpkmA[ geneListA[0] ].keys())

print(sampleListA)

fig = plt.figure(figsize=(6,6))
ax1 = fig.add_subplot(1,1,1)
ax1.plot([math.log10(rpkmA[x]['mean']+0.001) for x in geneListA], [math.log10(rpkmA[x][sampleListA[0]]+0.001) for x in geneListA], 'k.')
ax1.plot([math.log10(rpkmA[x]['mean']+0.001) for x in geneListA], [math.log10(rpkmA[x][sampleListA[1]]+0.001) for x in geneListA], 'r.')
ax1.plot([math.log10(rpkmA[x]['mean']+0.001) for x in geneListA], [math.log10(rpkmA[x][sampleListA[2]]+0.001) for x in geneListA], 'b.')
plt.show()


['LiverA', 'LiverB', 'LiverC', 'mean']

In [ ]: