In [1]:
from __future__ import division
from pandas import *
import os, os.path
import csv
from itertools import product
import numpy as np
import matplotlib.pyplot as plt

os.chdir('/home/will/Tip60Data/')

In [2]:
microdata = read_csv('microdata.tsv', sep='\t', index_col = 0)
microdata.columns = MultiIndex.from_tuples([col.split('_') for col in microdata.columns], names = ['Experiment', 'Run'])
agg_microdata = microdata.groupby(level = 'Experiment', axis = 1).mean()

In [3]:
check_cols = ['dTIP60E431Q', 'dTIP60WT']
fold_changes = agg_microdata.copy()
for col in check_cols:
    fold_changes[col] = agg_microdata[col]/agg_microdata['Control']
    
fold_changes = fold_changes.drop(['Control'], axis = 1).applymap(np.log2)

In [4]:
microdata


Out[4]:
<class 'pandas.core.frame.DataFrame'>
Index: 22636 entries, NM_001007095.2_at to NR_048526.1_at
Data columns:
('Control', '1')    22636  non-null values
('Control', '2')    22636  non-null values
('dTIP60E431Q', '1')22636  non-null values
('dTIP60E431Q', '2')22636  non-null values
('dTIP60WT', '1')   22636  non-null values
('dTIP60WT', '2')   22636  non-null values
dtypes: float64(6)

In [5]:
from scipy.stats import norm, ttest_1samp
fold_changes.plot(kind = 'kde')
plt.title('KS Density of fold-changes')


Out[5]:
<matplotlib.text.Text at 0x4025990>

In [6]:
pvals = fold_changes.copy()
for col in check_cols:
    mval = fold_changes[col].mean()
    stdval = fold_changes[col].std()
    pvals[col] = 1 - norm.cdf(abs(fold_changes[col]), loc=mval, scale = stdval)

In [7]:
cut = 0.05/len(pvals.index)
good_mask = (pvals<cut).any(axis = 1)
print pvals[good_mask].to_string()


                    dTIP60E431Q      dTIP60WT
RefSeq Genes                                 
NM_001014489.2_at  2.356172e-02  4.055312e-12
NM_001031943.2_at  3.833501e-08  2.203379e-01
NM_001042847.2_at  2.273285e-01  1.351625e-10
NM_001043100.2_at  8.350605e-08  5.585120e-02
NM_001043144.1_at  5.481837e-12  5.072655e-10
NM_001103387.1_at  2.990798e-09  4.257876e-01
NM_001103616.1_at  2.193840e-06  1.457361e-03
NM_001103691.2_at  8.163893e-07  3.011441e-01
NM_001103928.1_at  4.012457e-12  1.334510e-01
NM_001104314.1_at  4.487533e-10  2.462526e-01
NM_001144423.2_at  1.335637e-07  4.256002e-01
NM_001144585.1_at  2.926381e-08  5.280150e-01
NM_001144751.1_at  1.367163e-01  1.718539e-06
NM_001169156.1_at  2.990798e-09  4.257876e-01
NM_001169518.1_at  2.008583e-07  3.381436e-01
NM_001170058.1_at  2.959241e-09  1.237294e-01
NM_001201799.1_at  8.063083e-07  1.587422e-02
NM_001201830.1_at  4.529393e-01  3.354802e-10
NM_001201871.1_at  2.037486e-07  5.515679e-09
NM_001201917.1_at  2.356172e-02  4.055312e-12
NM_001259447.1_at  6.809827e-05  3.996803e-14
NM_001259735.1_at  1.791859e-07  1.224591e-01
NM_001259896.1_at  1.304544e-07  3.929592e-01
NM_057232.4_at     2.055494e-06  1.487686e-01
NM_057444.3_at     6.440473e-07  1.793064e-02
NM_057596.3_at     1.963133e-08  1.701901e-01
NM_057930.3_at     5.637897e-04  6.898937e-10
NM_057983.4_at     4.028546e-10  4.652948e-03
NM_078583.2_at     0.000000e+00  3.943396e-01
NM_078794.3_at     1.689416e-01  0.000000e+00
NM_078798.4_at     0.000000e+00  3.033905e-01
NM_078979.3_at     2.629112e-01  4.560506e-10
NM_079005.4_at     1.982322e-02  0.000000e+00
NM_079020.4_at     3.889064e-02  0.000000e+00
NM_079021.4_at     2.910566e-08  6.220580e-13
NM_079025.4_at     3.793505e-01  2.446226e-08
NM_079028.2_at     2.093296e-01  0.000000e+00
NM_079063.3_at     4.185310e-01  1.734091e-10
NM_079143.2_at     3.871533e-07  2.834844e-01
NM_079144.4_at     5.480438e-09  5.264376e-01
NM_079157.2_at     7.764533e-02  6.533492e-08
NM_079177.3_at     3.844084e-04  0.000000e+00
NM_079397.2_at     3.735615e-07  1.813218e-01
NM_079667.3_at     3.644359e-02  0.000000e+00
NM_080077.2_at     0.000000e+00  3.780136e-01
NM_080175.2_at     3.390765e-11  2.908292e-02
NM_080275.2_at     2.530198e-05  2.631229e-14
NM_130535.2_at     1.298192e-08  3.230222e-01
NM_130687.2_at     4.254584e-02  0.000000e+00
NM_130688.3_at     8.135292e-06  3.223885e-08
NM_131923.3_at     1.188023e-10  0.000000e+00
NM_131984.1_at     2.215918e-02  3.038787e-07
NM_131995.1_at     4.773416e-03  2.664535e-15
NM_132034.1_at     4.197962e-07  2.085042e-02
NM_132038.2_at     3.802571e-07  6.174109e-02
NM_132341.3_at     3.807480e-01  2.066508e-07
NM_132525.2_at     1.773647e-06  2.438244e-01
NM_132571.1_at     1.510675e-09  7.868153e-10
NM_132713.2_at     1.931419e-03  7.986585e-08
NM_132723.2_at     3.596828e-01  9.301443e-07
NM_132749.1_at     6.337592e-03  9.734866e-10
NM_132756.2_at     5.020939e-01  0.000000e+00
NM_133117.2_at     7.289934e-05  0.000000e+00
NM_134577.1_at     1.367163e-01  1.718539e-06
NM_134601.2_at     2.262039e-01  6.639951e-07
NM_134874.3_at     2.861012e-07  3.519407e-13
NM_135030.2_at     2.193840e-06  1.457361e-03
NM_135313.2_at     4.343668e-09  7.771561e-16
NM_135329.2_at     4.330343e-01  3.219647e-15
NM_135391.2_at     1.006245e-04  0.000000e+00
NM_135456.3_at     4.529393e-01  3.354802e-10
NM_135590.1_at     4.792157e-08  1.689561e-03
NM_135844.3_at     2.037486e-07  5.515679e-09
NM_135869.1_at     5.058593e-06  2.035039e-13
NM_135952.1_at     2.008583e-07  3.381436e-01
NM_136145.3_at     3.267393e-01  9.420130e-10
NM_136240.2_at     1.923795e-06  4.479278e-01
NM_136679.4_at     2.408938e-01  0.000000e+00
NM_136685.2_at     6.683747e-03  1.196820e-13
NM_136807.1_at     4.692797e-07  4.608663e-07
NM_136841.2_at     3.936678e-10  4.308596e-03
NM_137057.2_at     4.461884e-05  6.360726e-08
NM_137115.2_at     1.940670e-13  4.153295e-01
NM_137158.3_at     1.171421e-06  1.699542e-05
NM_137176.2_at     6.171109e-03  1.029626e-09
NM_137316.3_at     6.809827e-05  3.996803e-14
NM_137361.3_at     2.512684e-06  1.145334e-08
NM_137475.2_at     1.105960e-01  0.000000e+00
NM_137559.1_at     3.716496e-01  2.220446e-16
NM_137574.2_at     2.279996e-03  0.000000e+00
NM_137608.2_at     2.993916e-11  0.000000e+00
NM_137617.3_at     1.141288e-06  4.308357e-01
NM_137639.1_at     7.676309e-08  6.428752e-02
NM_137720.3_at     8.350605e-08  5.585120e-02
NM_137738.2_at     2.158045e-06  5.078140e-01
NM_137981.2_at     4.455170e-03  1.882435e-10
NM_138241.3_at     9.508541e-07  5.075629e-01
NM_139618.3_at     4.373576e-03  3.418604e-07
NM_139633.2_at     1.341071e-07  2.084582e-01
NM_139649.2_at     5.587037e-07  3.507513e-02
NM_140154.2_at     1.588037e-04  2.383967e-08
NM_140163.2_at     3.358859e-07  0.000000e+00
NM_140165.1_at     2.942591e-04  0.000000e+00
NM_140181.1_at     2.553775e-10  1.693416e-01
NM_140273.2_at     9.212520e-11  5.053261e-01
NM_140352.2_at     1.432445e-02  7.027712e-13
NM_140603.1_at     3.033122e-04  3.302802e-12
NM_140615.2_at     2.296140e-07  6.606651e-06
NM_140621.1_at     5.481837e-12  5.072655e-10
NM_140646.3_at     1.860773e-06  1.111205e-01
NM_140660.3_at     4.977585e-01  2.729705e-11
NM_140765.3_at     8.167295e-02  6.903345e-11
NM_140776.2_at     4.973171e-07  3.004387e-01
NM_140777.2_at     9.895291e-08  4.825707e-01
NM_140805.1_at     1.304544e-07  3.929592e-01
NM_141052.2_at     7.261217e-08  1.566699e-01
NM_141088.1_at     4.525494e-03  4.299894e-13
NM_141097.3_at     3.254931e-08  1.890024e-01
NM_141098.2_at     6.304797e-09  4.188862e-02
NM_141099.2_at     6.933233e-10  2.708588e-01
NM_141103.3_at     2.035245e-09  1.142810e-01
NM_141186.3_at     1.519759e-06  4.634838e-01
NM_141189.1_at     1.275032e-07  3.052540e-09
NM_141364.1_at     9.660518e-09  4.653146e-06
NM_141368.2_at     5.632828e-12  0.000000e+00
NM_141371.2_at     6.289524e-12  2.447679e-04
NM_141379.1_at     1.120649e-03  0.000000e+00
NM_141383.1_at     1.941259e-08  1.249429e-01
NM_141384.3_at     2.183006e-06  3.457451e-02
NM_141420.3_at     1.965321e-06  4.424078e-04
NM_142057.2_at     1.642338e-07  7.828557e-03
NM_142115.1_at     2.007161e-11  3.472339e-03
NM_142187.2_at     2.926381e-08  5.280150e-01
NM_142347.2_at     2.122845e-06  2.986960e-01
NM_142349.2_at     1.185149e-05  0.000000e+00
NM_142370.1_at     1.984570e-04  1.086850e-08
NM_142380.3_at     4.011546e-09  3.108416e-01
NM_142382.2_at     4.923609e-10  2.270048e-01
NM_142881.3_at     1.586629e-06  2.725607e-04
NM_142918.2_at     2.751782e-08  4.946202e-01
NM_142963.3_at     7.897005e-11  2.880920e-01
NM_142985.4_at     1.225356e-06  7.879686e-06
NM_143112.2_at     1.653904e-06  3.776616e-02
NM_143229.1_at     1.625380e-07  1.807443e-02
NM_143233.2_at     4.872326e-08  6.015597e-03
NM_143237.3_at     2.179724e-07  4.080528e-01
NM_143456.2_at     3.632250e-11  4.374908e-01
NM_143527.2_at     2.743180e-08  1.543248e-01
NM_143635.1_at     6.229535e-07  8.881784e-16
NM_143761.3_at     8.276158e-12  0.000000e+00
NM_144109.1_at     2.974029e-03  2.285454e-10
NM_144111.2_at     1.562655e-05  0.000000e+00
NM_144191.4_at     1.791859e-07  1.224591e-01
NM_144196.2_at     3.037068e-07  2.728767e-01
NM_144368.2_at     1.128788e-06  4.901860e-01
NM_144379.3_at     4.841584e-02  7.272382e-07
NM_144380.2_at     1.488918e-01  0.000000e+00
NM_164601.1_at     2.193840e-06  1.457361e-03
NM_164602.1_at     2.193840e-06  1.457361e-03
NM_164740.2_at     1.800838e-02  1.020003e-10
NM_164888.3_at     1.770474e-01  3.377187e-12
NM_165165.1_at     2.008583e-07  3.381436e-01
NM_165362.1_at     1.747354e-03  0.000000e+00
NM_165363.1_at     1.747354e-03  0.000000e+00
NM_166277.2_at     4.561179e-03  1.675994e-08
NM_166512.2_at     1.226739e-07  3.399438e-01
NM_166957.1_at     4.789260e-02  7.236615e-10
NM_167061.2_at     5.042538e-03  2.576828e-13
NM_167540.3_at     5.534107e-06  2.512070e-07
NM_168019.3_at     4.862777e-14  3.389933e-02
NM_168078.1_at     1.341071e-07  2.084582e-01
NM_168079.1_at     1.341071e-07  2.084582e-01
NM_168244.2_at     8.111855e-02  0.000000e+00
NM_168288.2_at     1.469648e-02  4.488694e-10
NM_168396.1_at     3.404407e-02  6.398869e-07
NM_168441.1_at     1.128722e-06  2.464024e-01
NM_168657.1_at     3.033122e-04  3.302802e-12
NM_168795.3_at     6.167814e-07  1.092393e-07
NM_168796.3_at     6.167814e-07  1.092393e-07
NM_168797.2_at     6.167814e-07  1.092393e-07
NM_169130.1_at     9.660518e-09  4.653146e-06
NM_169766.2_at     4.866276e-08  3.329687e-08
NM_170211.2_at     5.935881e-07  4.464565e-01
NM_170415.4_at     3.807880e-02  9.898543e-10
NM_176222.1_at     3.478619e-08  4.015223e-01
NM_176359.1_at     1.032578e-06  2.064449e-01
NM_206133.3_at     1.515695e-04  2.080534e-10
NM_206350.2_at     1.302702e-11  1.501247e-02

In [8]:
from operator import methodcaller
from itertools import groupby
from Bio.Seq import Seq
from Bio import Motif
from StringIO import StringIO

def yield_motifs():
    motifdir = '/home/will/Tip60Data/TFdata/'
    with open(motifdir + 'matrix_only.txt') as handle:
        for key, lines in groupby(handle, methodcaller('startswith', '>')):
            if key:
                name = lines.next().strip().split()[-1].lower()
            else:
                tmp = ''.join(lines)
                mot = Motif.read(StringIO(tmp), 'jaspar-pfm')
                yield name, mot
                yield name+'-R', mot.reverse_complement()

            
pwm_dict = {}
for num, (name, mot) in enumerate(yield_motifs()):
    if num % 100 == 0:
        print num
    thresh = Motif.Thresholds.ScoreDistribution(mot, precision = 50).threshold_fpr(0.0001)
    pwm_dict[name] = (mot, thresh)


0
100
200

In [10]:
from itertools import imap
from operator import itemgetter
def unique_justseen(iterable, key=None):
    "List unique elements, preserving order. Remember only the element just seen."
    # unique_justseen('AAAABBBCCDAABBB') --> A B C D A B
    # unique_justseen('ABBCcAD', str.lower) --> A B C A D
    return imap(next, imap(itemgetter(1), groupby(iterable, key)))

def take(n, iterable):
    "Return first n items of the iterable as a list"
    return list(islice(iterable, n))

In [11]:
def scan_seqs(tup):
    pwm_tup, row = tup
    seq = Seq(row['Seq'])
    start = int(row['Start'])
    ch = row['Chromosome']
    strand = row['Strand']
    name, (mot, thresh) = pwm_tup
    results = []
    for loc, m in mot.search_pwm(seq, threshold=thresh):
        results.append((name, ch, start+loc, start+loc+len(mot), strand))
    
    return results

In [17]:
from concurrent.futures import ProcessPoolExecutor, as_completed
from functools import partial
from itertools import islice

interval_fields = ['Chromosome','Start','Stop','Refseq','Junk1','Strand','Junk2','Junk3','Junk4','Junk5','Junk6','Junk7','Seq']

with open('seqdata/PromoterSeqdata.interval') as handle:
    with open('seqdata/TFBindingPos.interval', 'w') as ohandle:
        reader = csv.DictReader(handle, delimiter = '\t', fieldnames=interval_fields)
        nreader = unique_justseen(reader, key = lambda x: x['Seq'])
        
        writer = csv.writer(ohandle, delimiter = '\t')
        writer.writerow(['TFname', 'Chr', 'Start', 'Stop', 'strand'])
        
        check_items = product(sorted(pwm_dict.items()), nreader)
        blocksize = 50000
        with ProcessPoolExecutor(max_workers = 30) as executor:
            block = take(blocksize, check_items)
            start = 0
            while block:
                res = executor.map(scan_seqs, block)
                for r in res:
                    if r:
                        writer.writerows(r)
                block = take(blocksize, check_items)    
                start += blocksize
                num = int(start/len(pwm_dict))
                print num


192
384
576
769
961
1153
1346
1538
1730
1923
2115
2307
2500
2692
2884
3076
3269
3461
3653
3846
4038
4230
4423
4615
4807
5000
5192
5384
5576
5769
5961
6153
6346
6538
6730
6923
7115
7307
7500
7692
7884
8076
8269
8461
8653
8846
9038
9230
9423
9615
9807
10000
10192
10384
10576
10769
10961
11153
11346
11538
11730
11923
12115
12307
12500
12692
12884
13076
13269
13461
13653
13846
14038
14230
14423
14615
14807
15000
15192
15384
15576
15769
15961
16153
16346
16538
16730
16923
17115
17307
17500
17692
17884
18076
18269
18461
18653
18846
19038
19230
19423
19615
19807
20000
20192
20384
20576
20769
20961
21153
21346
21538
21730
21923
22115
22307
22500
22692
22884
23076
23269
23461
23653
23846
24038
24230
24423

In [24]:
def make_filename(inp):
    return inp.replace(' ', '-').replace(':', '-')


with open('seqdata/TFBindingPos.interval') as handle:
    reader = csv.reader(handle, delimiter = '\t')
    junk = reader.next()
    grouper = lambda x: x[0]
    for key, rows in groupby(reader, key = grouper):
        fname = make_filename(key)
        with open('seqdata/TFhits/'+fname + '.bed', 'a') as ohandle:
            writer = csv.writer(ohandle, delimiter = '\t')
            for row in rows:
                if (int(row[2]) > 0) and (int(row[3]) > 0):
                    writer.writerow(row[1:])

In [23]:
wanted_genes = set([n.split('.')[0] for n in pvals[good_mask].index])

with open('seqdata/PromoterInervals') as handle:
    with open('seqdata/SigGenePromoters.bed', 'w') as ohandle:
        reader = csv.reader(handle, delimiter = '\t')
        writer = csv.writer(ohandle, delimiter = '\t')
        for row in reader:
            if row[3] in wanted_genes:
                print row[3]
                wanted = [row[0], row[1], row[2], row[5]]
                writer.writerow(wanted)


NM_001103387
NM_001169156
NM_057444
NM_130535
NM_130688
NM_166957
NM_130687
NM_057596
NM_131923
NM_131984
NM_131995
NM_132038
NM_132034
NM_167061
NM_132341
NM_132525
NM_132571
NM_078583
NM_132723
NM_132713
NM_132756
NM_132749
NM_167540
NM_133117
NM_134577
NM_001144751
NM_134601
NM_001042847
NM_135952
NM_165165
NM_001169518
NM_165363
NM_165362
NM_134874
NM_144380
NM_164602
NM_001103616
NM_135030
NM_164601
NM_164740
NM_135313
NM_144379
NM_135329
NM_001201799
NM_135391
NM_078794
NM_078798
NM_135456
NM_001201830
NM_164888
NM_135590
NM_135844
NM_001201871
NM_135869
NM_001103691
NM_001201917
NM_001014489
NM_136145
NM_136240
NM_136685
NM_136679
NM_136807
NM_136841
NM_078979
NM_079005
NM_137057
NM_137115
NM_079021
NM_079020
NM_143761
NM_079025
NM_137158
NM_137176
NM_079028
NM_206133
NM_137316
NM_001259447
NM_137361
NM_144111
NM_144109
NM_166277
NM_137475
NM_176222
NM_079063
NM_137559
NM_137574
NM_137608
NM_137617
NM_137639
NM_001043100
NM_137720
NM_137738
NM_001103928
NM_166512
NM_137981
NM_079143
NM_079144
NM_079157
NM_138241
NM_168019
NM_079177
NM_057232
NM_139633
NM_168079
NM_168078
NM_139618
NM_139649
NM_057930
NM_144196
NM_001144423
NM_168244
NM_144191
NM_001259735
NM_168288
NM_001031943
NM_168396
NM_140154
NM_140163
NM_140165
NM_168441
NM_140181
NM_080275
NM_140273
NM_080077
NM_140352
NM_206350
NM_140603
NM_001043144
NM_140621
NM_168657
NM_140615
NM_140646
NM_140660
NM_079397
NM_140765
NM_140776
NM_140777
NM_001259896
NM_140805
NM_176359
NM_168796
NM_168795
NM_168797
NM_141052
NM_141099
NM_141097
NM_141098
NM_141103
NM_141088
NM_141186
NM_141189
NM_141364
NM_169130
NM_141368
NM_141371
NM_141384
NM_141383
NM_001170058
NM_141379
NM_141420
NM_144368
NM_080175
NM_057983
NM_142057
NM_001104314
NM_142115
NM_001144585
NM_142187
NM_142347
NM_142349
NM_142370
NM_079667
NM_142380
NM_169766
NM_142382
NM_142881
NM_142918
NM_142963
NM_142985
NM_143112
NM_170211
NM_143237
NM_143229
NM_143233
NM_170415
NM_143456
NM_143527
NM_143635

In [22]:
wanted_genes


Out[22]:
set(['NM_140273.2_at',
     'NM_136685.2_at',
     'NM_164888.3_at',
     'NM_143237.3_at',
     'NM_140615.2_at',
     'NM_169130.1_at',
     'NM_001201799.1_at',
     'NM_137720.3_at',
     'NM_142881.3_at',
     'NM_139649.2_at',
     'NM_001259896.1_at',
     'NM_057596.3_at',
     'NM_137738.2_at',
     'NM_078794.3_at',
     'NM_001031943.2_at',
     'NM_132723.2_at',
     'NM_001144751.1_at',
     'NM_176222.1_at',
     'NM_140660.3_at',
     'NM_142382.2_at',
     'NM_079157.2_at',
     'NM_137176.2_at',
     'NM_176359.1_at',
     'NM_141099.2_at',
     'NM_142347.2_at',
     'NM_001170058.1_at',
     'NM_143527.2_at',
     'NM_079397.2_at',
     'NM_001103691.2_at',
     'NM_079063.3_at',
     'NM_141383.1_at',
     'NM_142187.2_at',
     'NM_137115.2_at',
     'NM_079667.3_at',
     'NM_168441.1_at',
     'NM_141098.2_at',
     'NM_167540.3_at',
     'NM_057232.4_at',
     'NM_143112.2_at',
     'NM_001042847.2_at',
     'NM_168079.1_at',
     'NM_079028.2_at',
     'NM_137559.1_at',
     'NM_136240.2_at',
     'NM_144379.3_at',
     'NM_144111.2_at',
     'NM_140776.2_at',
     'NM_138241.3_at',
     'NM_141189.1_at',
     'NM_135030.2_at',
     'NM_141379.1_at',
     'NM_165362.1_at',
     'NM_080175.2_at',
     'NM_130688.3_at',
     'NM_206350.2_at',
     'NM_166277.2_at',
     'NM_137475.2_at',
     'NM_137316.3_at',
     'NM_135391.2_at',
     'NM_078798.4_at',
     'NM_139618.3_at',
     'NM_135590.1_at',
     'NM_079177.3_at',
     'NM_140765.3_at',
     'NM_142370.1_at',
     'NM_135952.1_at',
     'NM_137981.2_at',
     'NM_001103616.1_at',
     'NM_078583.2_at',
     'NM_001104314.1_at',
     'NM_131995.1_at',
     'NM_001043100.2_at',
     'NM_140777.2_at',
     'NM_137057.2_at',
     'NM_168797.2_at',
     'NM_144196.2_at',
     'NM_001043144.1_at',
     'NM_141384.3_at',
     'NM_001169518.1_at',
     'NM_142057.2_at',
     'NM_143233.2_at',
     'NM_137574.2_at',
     'NM_132034.1_at',
     'NM_080275.2_at',
     'NM_141371.2_at',
     'NM_168288.2_at',
     'NM_137617.3_at',
     'NM_132749.1_at',
     'NM_132341.3_at',
     'NM_132038.2_at',
     'NM_131923.3_at',
     'NM_131984.1_at',
     'NM_168657.1_at',
     'NM_166512.2_at',
     'NM_164601.1_at',
     'NM_206133.3_at',
     'NM_134577.1_at',
     'NM_142985.4_at',
     'NM_079025.4_at',
     'NM_078979.3_at',
     'NM_168795.3_at',
     'NM_140646.3_at',
     'NM_165363.1_at',
     'NM_001103387.1_at',
     'NM_001201871.1_at',
     'NM_001169156.1_at',
     'NM_079144.4_at',
     'NM_001144585.1_at',
     'NM_080077.2_at',
     'NM_057983.4_at',
     'NM_143456.2_at',
     'NM_136145.3_at',
     'NM_136807.1_at',
     'NM_144191.4_at',
     'NM_057444.3_at',
     'NM_137158.3_at',
     'NM_166957.1_at',
     'NM_141186.3_at',
     'NM_135869.1_at',
     'NM_168396.1_at',
     'NM_168796.3_at',
     'NM_001144423.2_at',
     'NM_133117.2_at',
     'NM_079021.4_at',
     'NM_170211.2_at',
     'NM_001201830.1_at',
     'NM_141103.3_at',
     'NM_079005.4_at',
     'NM_140163.2_at',
     'NM_168019.3_at',
     'NM_135313.2_at',
     'NM_135844.3_at',
     'NM_140181.1_at',
     'NM_139633.2_at',
     'NM_132713.2_at',
     'NM_144109.1_at',
     'NM_164602.1_at',
     'NM_132571.1_at',
     'NM_141420.3_at',
     'NM_001014489.2_at',
     'NM_142918.2_at',
     'NM_142115.1_at',
     'NM_130535.2_at',
     'NM_144368.2_at',
     'NM_170415.4_at',
     'NM_134601.2_at',
     'NM_142380.3_at',
     'NM_001201917.1_at',
     'NM_079020.4_at',
     'NM_142963.3_at',
     'NM_079143.2_at',
     'NM_140154.2_at',
     'NM_142349.2_at',
     'NM_141364.1_at',
     'NM_164740.2_at',
     'NM_132525.2_at',
     'NM_140805.1_at',
     'NM_144380.2_at',
     'NM_057930.3_at',
     'NM_137361.3_at',
     'NM_137639.1_at',
     'NM_168244.2_at',
     'NM_143229.1_at',
     'NM_168078.1_at',
     'NM_001259447.1_at',
     'NM_135329.2_at',
     'NM_132756.2_at',
     'NM_141088.1_at',
     'NM_136841.2_at',
     'NM_140621.1_at',
     'NM_140603.1_at',
     'NM_130687.2_at',
     'NM_134874.3_at',
     'NM_165165.1_at',
     'NM_140165.1_at',
     'NM_141368.2_at',
     'NM_135456.3_at',
     'NM_136679.4_at',
     'NM_137608.2_at',
     'NM_141052.2_at',
     'NM_001103928.1_at',
     'NM_140352.2_at',
     'NM_143635.1_at',
     'NM_169766.2_at',
     'NM_167061.2_at',
     'NM_001259735.1_at',
     'NM_143761.3_at',
     'NM_141097.3_at'])

In [ ]: