In [1]:
from __future__ import division
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np

from types import ListType
from itertools import combinations, groupby, islice, imap
from collections import Counter
from subprocess import check_output
from operator import itemgetter
from StringIO import StringIO
import csv
import shlex
import sys
import glob
sys.path.append('/home/will/PySeqUtils/')
sys.path.append('/home/will/DeepPipeline/AnalysisCode/')
from GeneralSeqTools import fasta_reader, fasta_writer
import HapReconTools

In [2]:
base_path = '/home/will/DeepPipeline/Data/ShoRAHruns/%s/tmp.bam'
files = ['DrexelMed.A0017.lastz', 'DrexelMed.A0017.R02.lastz',
         'DrexelMed.A0107.lastz', 'DrexelMed.A0107.R02.lastz']
hap_dict = {}

for name in files:
    print 'working on %s' % name
    fname = base_path % name
    
    print 'getting reads'
    reads = sorted(HapReconTools.read_from_bam(fname), key = itemgetter(1))
    ir_reads = list(HapReconTools.yield_IR_reads(reads))
    
    print 'generating graph'
    graph = HapReconTools.generate_hap_graph(ir_reads)
    
    print 'generating haplotypes'
    paths = HapReconTools.simple_paths(graph)
    out_haps = HapReconTools.assemble_haps(paths, graph)
    
    print 'estimating frequencies'
    hap_dict[name] = HapReconTools.estimate_freqs(reads, out_haps, 0.001, tol=0.1)


working on DrexelMed.A0017.lastz
getting reads
generating graph
generating haplotypes
estimating frequencies
0 exluding 0 haps but keeping 40575
10 -14171.7300964
20 -13690.5863151
30 -13426.3263756
40 -13248.3149243
50 -13116.8359629
60 -13014.932575
70 -12933.638408
80 -12867.5133033
90 -12812.91961
100 -12767.2737122
110 -12728.6782501
120 -12695.7116275
130 -12667.2920173
140 -12642.5843388
150 -12620.9355775
160 -12601.8294451
170 -12584.8540677
180 -12569.6783552
190 -12556.0342507
200 -12543.7031319
210 -12532.5052905
220 -12522.2917792
230 -12512.9381076
240 -12504.3393823
250 -12496.4065617
260 -12489.0635605
270 -12482.2449922
280 -12475.8943862
290 -12469.9627603
300 -12464.40746
310 -12459.1912016
320 -12454.2812767
330 -12449.6488864
340 -12445.2685812
350 -12441.1177896
360 -12437.1764211
370 -12433.426532
380 -12429.8520427
390 -12426.4385003
400 -12423.1728772
410 -12420.0434006
420 -12417.0394073
430 -12414.1512202
440 -12411.3700416
450 -12408.6878619
460 -12406.0973797
470 -12403.5919329
480 -12401.1654373
490 -12398.8123333
500 -12396.5275379
510 -12394.3064026
520 -12392.1446748
530 -12390.0384638
540 -12387.9842096
550 -12385.9786551
560 -12384.0188199
570 -12382.1019775
580 -12380.2256334
590 -12378.3875057
600 -12376.5855073
610 -12374.8177288
620 -12373.0824237
630 -12371.3779941
640 -12369.7029779
650 -12368.0560366
660 -12366.4359446
670 -12364.8415788
680 -12363.2719094
690 -12361.7259911
700 -12360.2029553
710 -12358.7020029
720 -12357.2223972
730 -12355.763458
740 -12354.3245559
750 -12352.905107
760 -12351.5045685
770 -12350.1224339
780 -12348.7582295
790 -12347.4115106
800 -12346.0818586
810 -12344.7688781
820 -12343.4721944
830 -12342.1914513
840 -12340.9263097
850 -12339.6764456
860 -12338.4415491
870 -12337.2213235
880 -12336.0154843
890 -12334.823759
900 -12333.6458864
910 -12332.4816166
920 -12331.3307106
930 -12330.1929404
940 -12329.0680889
950 -12327.9559497
960 -12326.856327
970 -12325.7690355
980 -12324.6939002
990 -12323.6307559
1000 -12322.579447
1010 -12321.5398271
1020 -12320.5117583
1030 -12319.4951109
1040 -12318.4897627
1 exluding 40469 haps but keeping 106
10 -3374.78367414
20 -3351.89039641
30 -3347.13402958
working on DrexelMed.A0017.R02.lastz
getting reads
generating graph
generating haplotypes
estimating frequencies
0 exluding 0 haps but keeping 8020
10 -5958.07264756
20 -5779.26181096
30 -5678.71747952
40 -5613.00718271
50 -5566.38122946
60 -5531.26238756
70 -5503.56480834
80 -5480.90762176
90 -5461.82292755
100 -5445.36975915
110 -5430.92780608
120 -5418.07778075
130 -5406.52817189
140 -5396.06921588
150 -5386.54380303
160 -5377.82920306
170 -5369.82571594
180 -5362.4497004
190 -5355.62931985
200 -5349.30194278
210 -5343.41252896
220 -5337.91258598
230 -5332.75944134
240 -5327.9156773
250 -5323.34864165
260 -5319.02998995
270 -5314.93524101
280 -5311.04334254
290 -5307.33625113
300 -5303.79853345
310 -5300.41699521
320 -5297.18034248
330 -5294.07887833
340 -5291.10423556
350 -5288.2491446
360 -5285.50723486
370 -5282.87286682
380 -5280.34099179
390 -5277.90703634
400 -5275.56680812
410 -5273.31642022
420 -5271.15223126
430 -5269.07079874
440 -5267.06884341
450 -5265.14322265
460 -5263.29091115
470 -5261.50898734
480 -5259.79462438
490 -5258.14508443
500 -5256.55771552
510 -5255.02995005
520 -5253.55930446
530 -5252.14337955
540 -5250.779861
550 -5249.46651997
560 -5248.20121344
570 -5246.98188424
580 -5245.80656067
590 -5244.67335569
600 -5243.58046567
610 -5242.52616868
620 -5241.50882245
1 exluding 7761 haps but keeping 259
10 -2868.82316976
20 -2863.61352402
30 -2861.453353
40 -2860.03861888
2 exluding 7763 haps but keeping 257
10 -2864.70501113
20 -2860.98953807
working on DrexelMed.A0107.lastz
getting reads
generating graph
generating haplotypes
estimating frequencies
0 exluding 0 haps but keeping 16014
10 -8560.35748645
20 -8331.12775169
30 -8203.4886651
40 -8118.70049142
50 -8057.42635232
60 -8010.40791974
70 -7972.63160377
80 -7941.24465274
90 -7914.55544106
100 -7891.50878621
110 -7871.40401199
120 -7853.74337898
130 -7838.15076891
140 -7824.32776319
150 -7812.02943765
160 -7801.05050709
170 -7791.2168749
180 -7782.37999312
190 -7774.41270947
200 -7767.20596233
210 -7760.66603317
220 -7754.71222617
230 -7749.27490668
240 -7744.2938494
250 -7739.71685231
260 -7735.49857467
270 -7731.59956133
280 -7727.98542032
290 -7724.62612645
300 -7721.49542843
310 -7718.57034191
320 -7715.8307139
330 -7713.25884752
340 -7710.8391778
350 -7708.55799113
360 -7706.40318226
370 -7704.36404373
380 -7702.43108337
390 -7700.5958662
400 -7698.85087757
410 -7697.18940469
420 -7695.60543422
430 -7694.09356367
440 -7692.64892493
450 -7691.26711821
460 -7689.94415492
470 -7688.67640838
480 -7687.46057121
490 -7686.29361847
500 -7685.17277572
510 -7684.09549137
520 -7683.05941264
1 exluding 15803 haps but keeping 211
10 -3291.12315419
20 -3286.83882017
30 -3285.20044355
40 -3284.00828288
working on DrexelMed.A0107.R02.lastz
getting reads
generating graph
generating haplotypes
estimating frequencies
0 exluding 0 haps but keeping 40975
10 -20365.8050106
20 -19670.0387879
30 -19272.0644025
40 -18997.3107684
50 -18791.237133
60 -18630.0408974
70 -18500.6592466
80 -18394.9831178
90 -18307.5889633
100 -18234.6145366
110 -18173.15894
120 -18120.9741656
130 -18076.2988371
140 -18037.7507271
150 -18004.2436458
160 -17974.919381
170 -17949.0935532
180 -17926.2145105
190 -17905.8330699
200 -17887.5803992
210 -17871.1516398
220 -17856.2934941
230 -17842.7945814
240 -17830.4777953
250 -17819.1941674
260 -17808.8179049
270 -17799.2423598
280 -17790.3767441
290 -17782.1434421
300 -17774.4758035
310 -17767.3163265
320 -17760.6151627
330 -17754.3288873
340 -17748.419491
350 -17742.8535532
360 -17737.6015635
370 -17732.6373612
380 -17727.93767
390 -17723.4817076
400 -17719.2508565
410 -17715.2283846
420 -17711.3992065
430 -17707.7496793
440 -17704.2674275
450 -17700.9411933
460 -17697.7607075
470 -17694.7165788
480 -17691.8001988
490 -17689.0036596
500 -17686.3196827
510 -17683.7415572
520 -17681.2630861
530 -17678.8785382
540 -17676.5826069
550 -17674.3703726
560 -17672.2372698
570 -17670.1790566
580 -17668.1917887
590 -17666.2717942
600 -17664.4156523
610 -17662.6201722
620 -17660.8823757
630 -17659.1994796
640 -17657.5688811
650 -17655.988143
660 -17654.4549815
670 -17652.9672542
680 -17651.522949
690 -17650.1201751
700 -17648.757153
710 -17647.4322069
720 -17646.1437571
730 -17644.8903128
740 -17643.6704661
750 -17642.4828857
760 -17641.3263122
770 -17640.1995531
780 -17639.1014778
790 -17638.0310141
800 -17636.9871442
810 -17635.9689013
1 exluding 40892 haps but keeping 83
10 -4958.09541296
20 -4926.32417849
30 -4913.9611175
40 -4907.30311561
50 -4903.34936431
60 -4900.74337551
70 -4898.82789223
80 -4897.29463005
90 -4895.99808834
100 -4894.86614187

In [6]:
reload(HapReconTools)
nhap_dict = {}

for name in files:
    print 'working on %s' % name
    fname = base_path % name
    
    print 'getting reads'
    reads = sorted(HapReconTools.read_from_bam(fname), key = itemgetter(1))
    ir_reads = list(HapReconTools.yield_IR_reads(reads))
    
    print 'generating graph'
    graph = HapReconTools.generate_hap_graph(ir_reads)
    
    print 'generating haplotypes'
    paths = HapReconTools.simple_paths(graph)
    out_haps = HapReconTools.assemble_haps(paths, graph)
    
    print 'estimating frequencies'
    nhap_dict[name] = HapReconTools.estimate_freqs(reads, out_haps, 0.01, tol=0.1)


working on DrexelMed.A0017.lastz
getting reads
generating graph
generating haplotypes
estimating frequencies
0 exluding 40526 haps but keeping 49
10 -1600.14809553
20 -1583.43712937
30 -1574.23305682
40 -1568.21563569
50 -1564.2257528
60 -1561.65070646
70 -1560.03477829
1 exluding 40551 haps but keeping 24
10 -1298.41448777
20 -1292.276772
30 -1290.53086605
working on DrexelMed.A0017.R02.lastz
getting reads
generating graph
generating haplotypes
estimating frequencies
0 exluding 7930 haps but keeping 90
10 -1541.37672848
20 -1530.68504436
30 -1526.1487316
40 -1523.08819454
50 -1520.84611792
60 -1519.18496327
70 -1517.95918493
1 exluding 7992 haps but keeping 28
10 -1044.53411963
20 -1041.2690665
30 -1039.77539096
40 -1038.4990634
working on DrexelMed.A0107.lastz
getting reads
generating graph
generating haplotypes
estimating frequencies
0 exluding 15923 haps but keeping 91
10 -1896.32443135
20 -1888.0878799
30 -1886.38631997
40 -1885.12089302
50 -1883.91143746
60 -1882.73780888
70 -1881.61018703
80 -1880.53878498
1 exluding 15983 haps but keeping 31
10 -1301.07065032
20 -1295.00266667
30 -1293.47182022
working on DrexelMed.A0107.R02.lastz
getting reads
generating graph
generating haplotypes
estimating frequencies
0 exluding 40944 haps but keeping 31
10 -2186.46592275
20 -2078.56448612
30 -2029.62995061
40 -2010.22690918
50 -2002.1366607
60 -1998.37357937
70 -1996.40968468
1 exluding 40953 haps but keeping 22
10 -2036.22473971
20 -1971.0374636
30 -1943.60940165
40 -1934.42204496
50 -1931.60157866

In [55]:
thap_dict = {}

for name in files:
    print 'working on %s' % name
    fname = base_path % name
    
    print 'getting reads'
    reads = sorted(HapReconTools.read_from_bam(fname), key = itemgetter(1))
    ir_reads = list(HapReconTools.yield_IR_reads(reads))
    
    print 'generating graph'
    graph = HapReconTools.generate_hap_graph(ir_reads)
    
    print 'generating haplotypes'
    paths = HapReconTools.simple_paths(graph)
    out_haps = HapReconTools.assemble_haps(paths, graph)
    
    print 'estimating frequencies'
    thap_dict[name] = HapReconTools.estimate_freqs(reads, out_haps, 0, quick_pass_reps=0, tol=0.1)


working on DrexelMed.A0017.lastz
getting reads
generating graph
generating haplotypes
estimating frequencies
0 exluding 0 haps but keeping 40575
10 -14171.7300964
20 -13690.5863151
30 -13426.3263756
40 -13248.3149243
50 -13116.8359629
60 -13014.932575
70 -12933.638408
80 -12867.5133033
90 -12812.91961
100 -12767.2737122
110 -12728.6782501
120 -12695.7116275
130 -12667.2920173
140 -12642.5843388
150 -12620.9355775
160 -12601.8294451
170 -12584.8540677
180 -12569.6783552
190 -12556.0342507
200 -12543.7031319
210 -12532.5052905
220 -12522.2917792
230 -12512.9381076
240 -12504.3393823
250 -12496.4065617
260 -12489.0635605
270 -12482.2449922
280 -12475.8943862
290 -12469.9627603
300 -12464.40746
310 -12459.1912016
320 -12454.2812767
330 -12449.6488864
340 -12445.2685812
350 -12441.1177896
360 -12437.1764211
370 -12433.426532
380 -12429.8520427
390 -12426.4385003
400 -12423.1728772
410 -12420.0434006
420 -12417.0394073
430 -12414.1512202
440 -12411.3700416
450 -12408.6878619
460 -12406.0973797
470 -12403.5919329
480 -12401.1654373
490 -12398.8123333
500 -12396.5275379
510 -12394.3064026
520 -12392.1446748
530 -12390.0384638
540 -12387.9842096
550 -12385.9786551
560 -12384.0188199
570 -12382.1019775
580 -12380.2256334
590 -12378.3875057
600 -12376.5855073
610 -12374.8177288
620 -12373.0824237
630 -12371.3779941
640 -12369.7029779
650 -12368.0560366
660 -12366.4359446
670 -12364.8415788
680 -12363.2719094
690 -12361.7259911
700 -12360.2029553
710 -12358.7020029
720 -12357.2223972
730 -12355.763458
740 -12354.3245559
750 -12352.905107
760 -12351.5045685
770 -12350.1224339
780 -12348.7582295
790 -12347.4115106
800 -12346.0818586
810 -12344.7688781
820 -12343.4721944
830 -12342.1914513
840 -12340.9263097
850 -12339.6764456
860 -12338.4415491
870 -12337.2213235
880 -12336.0154843
890 -12334.823759
900 -12333.6458864
910 -12332.4816166
920 -12331.3307106
930 -12330.1929404
940 -12329.0680889
950 -12327.9559497
960 -12326.856327
970 -12325.7690355
980 -12324.6939002
990 -12323.6307559
1000 -12322.579447
1010 -12321.5398271
1020 -12320.5117583
1030 -12319.4951109
1040 -12318.4897627
working on DrexelMed.A0017.R02.lastz
getting reads
generating graph
generating haplotypes
estimating frequencies
0 exluding 0 haps but keeping 8020
10 -5958.07264756
20 -5779.26181096
30 -5678.71747952
40 -5613.00718271
50 -5566.38122946
60 -5531.26238756
70 -5503.56480834
80 -5480.90762176
90 -5461.82292755
100 -5445.36975915
110 -5430.92780608
120 -5418.07778075
130 -5406.52817189
140 -5396.06921588
150 -5386.54380303
160 -5377.82920306
170 -5369.82571594
180 -5362.4497004
190 -5355.62931985
200 -5349.30194278
210 -5343.41252896
220 -5337.91258598
230 -5332.75944134
240 -5327.9156773
250 -5323.34864165
260 -5319.02998995
270 -5314.93524101
280 -5311.04334254
290 -5307.33625113
300 -5303.79853345
310 -5300.41699521
320 -5297.18034248
330 -5294.07887833
340 -5291.10423556
350 -5288.2491446
360 -5285.50723486
370 -5282.87286682
380 -5280.34099179
390 -5277.90703634
400 -5275.56680812
410 -5273.31642022
420 -5271.15223126
430 -5269.07079874
440 -5267.06884341
450 -5265.14322265
460 -5263.29091115
470 -5261.50898734
480 -5259.79462438
490 -5258.14508443
500 -5256.55771552
510 -5255.02995005
520 -5253.55930446
530 -5252.14337955
540 -5250.779861
550 -5249.46651997
560 -5248.20121344
570 -5246.98188424
580 -5245.80656067
590 -5244.67335569
600 -5243.58046567
610 -5242.52616868
620 -5241.50882245
working on DrexelMed.A0107.lastz
getting reads
generating graph
generating haplotypes
estimating frequencies
0 exluding 0 haps but keeping 16014
10 -8560.35748645
20 -8331.12775169
30 -8203.4886651
40 -8118.70049142
50 -8057.42635232
60 -8010.40791974
70 -7972.63160377
80 -7941.24465274
90 -7914.55544106
100 -7891.50878621
110 -7871.40401199
120 -7853.74337898
130 -7838.15076891
140 -7824.32776319
150 -7812.02943765
160 -7801.05050709
170 -7791.2168749
180 -7782.37999312
190 -7774.41270947
200 -7767.20596233
210 -7760.66603317
220 -7754.71222617
230 -7749.27490668
240 -7744.2938494
250 -7739.71685231
260 -7735.49857467
270 -7731.59956133
280 -7727.98542032
290 -7724.62612645
300 -7721.49542843
310 -7718.57034191
320 -7715.8307139
330 -7713.25884752
340 -7710.8391778
350 -7708.55799113
360 -7706.40318226
370 -7704.36404373
380 -7702.43108337
390 -7700.5958662
400 -7698.85087757
410 -7697.18940469
420 -7695.60543422
430 -7694.09356367
440 -7692.64892493
450 -7691.26711821
460 -7689.94415492
470 -7688.67640838
480 -7687.46057121
490 -7686.29361847
500 -7685.17277572
510 -7684.09549137
520 -7683.05941264
working on DrexelMed.A0107.R02.lastz
getting reads
generating graph
generating haplotypes
estimating frequencies
0 exluding 0 haps but keeping 40975
10 -20365.8050106
20 -19670.0387879
30 -19272.0644025
40 -18997.3107684
50 -18791.237133
60 -18630.0408974
70 -18500.6592466
80 -18394.9831178
90 -18307.5889633
100 -18234.6145366
110 -18173.15894
120 -18120.9741656
130 -18076.2988371
140 -18037.7507271
150 -18004.2436458
160 -17974.919381
170 -17949.0935532
180 -17926.2145105
190 -17905.8330699
200 -17887.5803992
210 -17871.1516398
220 -17856.2934941
230 -17842.7945814
240 -17830.4777953
250 -17819.1941674
260 -17808.8179049
270 -17799.2423598
280 -17790.3767441
290 -17782.1434421
300 -17774.4758035
310 -17767.3163265
320 -17760.6151627
330 -17754.3288873
340 -17748.419491
350 -17742.8535532
360 -17737.6015635
370 -17732.6373612
380 -17727.93767
390 -17723.4817076
400 -17719.2508565
410 -17715.2283846
420 -17711.3992065
430 -17707.7496793
440 -17704.2674275
450 -17700.9411933
460 -17697.7607075
470 -17694.7165788
480 -17691.8001988
490 -17689.0036596
500 -17686.3196827
510 -17683.7415572
520 -17681.2630861
530 -17678.8785382
540 -17676.5826069
550 -17674.3703726
560 -17672.2372698
570 -17670.1790566
580 -17668.1917887
590 -17666.2717942
600 -17664.4156523
610 -17662.6201722
620 -17660.8823757
630 -17659.1994796
640 -17657.5688811
650 -17655.988143
660 -17654.4549815
670 -17652.9672542
680 -17651.522949
690 -17650.1201751
700 -17648.757153
710 -17647.4322069
720 -17646.1437571
730 -17644.8903128
740 -17643.6704661
750 -17642.4828857
760 -17641.3263122
770 -17640.1995531
780 -17639.1014778
790 -17638.0310141
800 -17636.9871442
810 -17635.9689013

In [24]:
from tempfile import NamedTemporaryFile as NTF
from subprocess import check_output, check_call
import shlex
import os
from concurrent.futures import ProcessPoolExecutor
import csv
from StringIO import StringIO
from itertools import islice
from functools import partial

def check_seqs(db_path, seqs):
    cmd = "blastn -db %(db)s -query %(q)s -outfmt '10 qseqid sseqid pident nident length' -num_threads 20 -max_target_seqs 1"
    fields = ['SeqA', 'SeqB', 'pident', 'nident', 'length']
    dpath =  '/home/will/tmpstuf/haptest/tmpseqs/'
    
    with NTF(suffix='.fa', dir=dpath, delete=False) as check_handle:
        
        fasta_writer(check_handle, seqs)
        check_handle.flush()
        os.fsync(check_handle.fileno())
        
        tdict = {
                 'db':db_path,
                 'q':check_handle.name
                 }
        cmd_list = shlex.split(cmd % tdict)
        out = check_output(cmd_list)
        reader = csv.DictReader(StringIO(out), fieldnames=fields)
        return list(reader)
    

def yield_blocks(iterable, block_size):
    
    block = list(islice(iterable, block_size))
    while block:
        yield block
        block = list(islice(iterable, block_size))
    

def blast_all_v_all(seqsA, seqsB, block_size=20):
        
    dpath = '/home/will/tmpstuf/haptest/tmpseqs/'
    with NTF(suffix='.fa', dir=dpath, delete=False) as db_handle:
        fasta_writer(db_handle, seqsA)
        db_handle.flush()
        os.fsync(db_handle.fileno())
        
        cmd = 'makeblastdb -in %s -dbtype nucl' % db_handle.name
        cmd_list = shlex.split(cmd)
        check_call(cmd_list)
        
        align_func = partial(check_seqs, db_handle.name)
        check_iterable = islice(yield_blocks(iter(seqsB), 50), 20)
        blocks = []
        with ProcessPoolExecutor(max_workers=5) as pool:
            res_iter = pool.map(align_func, check_iterable)
            for num, block in enumerate(res_iter):
                blocks += block
        return blocks

In [52]:
def add_names(seqs, base):
    tbase = base + '%03i'
    return [(tbase % num, seq) for num, ((seq, pos), _) in enumerate(seqs)]

paired_visits = [('DrexelMed.A0107.lastz', 'DrexelMed.A0107.R02.lastz'),
                 ('DrexelMed.A0017.lastz', 'DrexelMed.A0017.R02.lastz')]
scatter_data = []
for vA, vB in paired_visits:

    vA_names = add_names(hap_dict[vA], 'R00-')
    vB_names = add_names(hap_dict[vB], 'R02-')
    scores = blast_all_v_all(vA_names, vB_names)
    for row in scores:
        sA_pos = int(row['SeqB'].split('-')[-1])
        sB_pos = int(row['SeqA'].split('-')[-1])
        freqA = hap_dict[vA][sA_pos][-1]
        cons = float(row['nident'])/float(row['length'])
        scatter_data.append((freqA, cons, vA.split('.')[1]))

In [53]:
import pandas as pd
tdata = pd.DataFrame(scatter_data, columns = ['SourceFreq', 'Cons', 'Pat'])
cdict = {'A0107':'g', 'A0017':'r'}
tdata['Color'] = tdata['Pat'].map(lambda x: cdict[x])

In [54]:
plt.figure(figsize = (10, 10))
ax = plt.subplot(111)
ax.scatter(tdata['SourceFreq'], tdata['Cons'], c = list(tdata['Color']), alpha=0.5)
ax.set_xscale('log')



In [39]:
tdata['Pat']


Out[39]:
0     A0107
1     A0107
2     A0107
3     A0107
4     A0107
5     A0107
6     A0107
7     A0107
8     A0107
9     A0107
10    A0107
11    A0107
12    A0107
13    A0107
14    A0107
...
325    A0017
326    A0017
327    A0017
328    A0017
329    A0017
330    A0017
331    A0017
332    A0017
333    A0017
334    A0017
335    A0017
336    A0017
337    A0017
338    A0017
339    A0017
Name: Pat, Length: 340, dtype: object

In [ ]: