Plotting the mutations density in the genes

Are there specific genes in which a significant portion of the mutations fall? We want to answer this by finding the distribution of the number of mutations per gene. That is, for each integer, we want to know how many genes have that number of mutations.


In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import optimize

sns.set()

We first map genes to the number of mutations they harbor (read from a random sample of 100,000 mutations)


In [2]:
from collections import Counter
from ICGC_data_parser import SSM_Reader



mutations_per_gene = Counter()

mutations = SSM_Reader(filename='/home/ad115/Downloads/simple_somatic_mutation.aggregated.vcf.gz')

# Fix weird bug due to malformed description headers
mutations.infos['studies'] = mutations.infos['studies']._replace(type='String')

consequences = mutations.subfield_parser('CONSEQUENCE')



for i, record in enumerate(mutations):
    if i % 100000 == 0:
        print(i)
    affected_genes = [c.gene_symbol for c in consequences(record) if c.gene_affected]
    mutations_per_gene.update(affected_genes)
    
mutations_per_gene.most_common(5)


0
100000
200000
300000
400000
500000
600000
700000
800000
900000
1000000
1100000
1200000
1300000
1400000
1500000
1600000
1700000
1800000
1900000
2000000
2100000
2200000
2300000
2400000
2500000
2600000
2700000
2800000
2900000
3000000
3100000
3200000
3300000
3400000
3500000
3600000
3700000
3800000
3900000
4000000
4100000
4200000
4300000
4400000
4500000
4600000
4700000
4800000
4900000
5000000
5100000
5200000
5300000
5400000
5500000
5600000
5700000
5800000
5900000
6000000
6100000
6200000
6300000
6400000
6500000
6600000
6700000
6800000
6900000
7000000
7100000
7200000
7300000
7400000
7500000
7600000
7700000
7800000
7900000
8000000
8100000
8200000
8300000
8400000
8500000
8600000
8700000
8800000
8900000
9000000
9100000
9200000
9300000
9400000
9500000
9600000
9700000
9800000
9900000
10000000
10100000
10200000
10300000
10400000
10500000
10600000
10700000
10800000
10900000
11000000
11100000
11200000
11300000
11400000
11500000
11600000
11700000
11800000
11900000
12000000
12100000
12200000
12300000
12400000
12500000
12600000
12700000
12800000
12900000
13000000
13100000
13200000
13300000
13400000
13500000
13600000
13700000
13800000
13900000
14000000
14100000
14200000
14300000
14400000
14500000
14600000
14700000
14800000
14900000
15000000
15100000
15200000
15300000
15400000
15500000
15600000
15700000
15800000
15900000
16000000
16100000
16200000
16300000
16400000
16500000
16600000
16700000
16800000
16900000
17000000
17100000
17200000
17300000
17400000
17500000
17600000
17700000
17800000
17900000
18000000
18100000
18200000
18300000
18400000
18500000
18600000
18700000
18800000
18900000
19000000
19100000
19200000
19300000
19400000
19500000
19600000
19700000
19800000
19900000
20000000
20100000
20200000
20300000
20400000
20500000
20600000
20700000
20800000
20900000
21000000
21100000
21200000
21300000
21400000
21500000
21600000
21700000
21800000
21900000
22000000
22100000
22200000
22300000
22400000
22500000
22600000
22700000
22800000
22900000
23000000
23100000
23200000
23300000
23400000
23500000
23600000
23700000
23800000
23900000
24000000
24100000
24200000
24300000
24400000
24500000
24600000
24700000
24800000
24900000
25000000
25100000
25200000
25300000
25400000
25500000
25600000
25700000
25800000
25900000
26000000
26100000
26200000
26300000
26400000
26500000
26600000
26700000
26800000
26900000
27000000
27100000
27200000
27300000
27400000
27500000
27600000
27700000
27800000
27900000
28000000
28100000
28200000
28300000
28400000
28500000
28600000
28700000
28800000
28900000
29000000
29100000
29200000
29300000
29400000
29500000
29600000
29700000
29800000
29900000
30000000
30100000
30200000
30300000
30400000
30500000
30600000
30700000
30800000
30900000
31000000
31100000
31200000
31300000
31400000
31500000
31600000
31700000
31800000
31900000
32000000
32100000
32200000
32300000
32400000
32500000
32600000
32700000
32800000
32900000
33000000
33100000
33200000
33300000
33400000
33500000
33600000
33700000
33800000
33900000
34000000
34100000
34200000
34300000
34400000
34500000
34600000
34700000
34800000
34900000
35000000
35100000
35200000
35300000
35400000
35500000
35600000
35700000
35800000
35900000
36000000
36100000
36200000
36300000
36400000
36500000
36600000
36700000
36800000
36900000
37000000
37100000
37200000
37300000
37400000
37500000
37600000
37700000
37800000
37900000
38000000
38100000
38200000
38300000
38400000
38500000
38600000
38700000
38800000
38900000
39000000
39100000
39200000
39300000
39400000
39500000
39600000
39700000
39800000
39900000
40000000
40100000
40200000
40300000
40400000
40500000
40600000
40700000
40800000
40900000
41000000
41100000
41200000
41300000
41400000
41500000
41600000
41700000
41800000
41900000
42000000
42100000
42200000
42300000
42400000
42500000
42600000
42700000
42800000
42900000
43000000
43100000
43200000
43300000
43400000
43500000
43600000
43700000
43800000
43900000
44000000
44100000
44200000
44300000
44400000
44500000
44600000
44700000
44800000
44900000
45000000
45100000
45200000
45300000
45400000
45500000
45600000
45700000
45800000
45900000
46000000
46100000
46200000
46300000
46400000
46500000
46600000
46700000
46800000
46900000
47000000
47100000
47200000
47300000
47400000
47500000
47600000
47700000
47800000
47900000
48000000
48100000
48200000
48300000
48400000
48500000
48600000
48700000
48800000
48900000
49000000
49100000
49200000
49300000
49400000
49500000
49600000
49700000
49800000
49900000
50000000
50100000
50200000
50300000
50400000
50500000
50600000
50700000
50800000
50900000
51000000
51100000
51200000
51300000
51400000
51500000
51600000
51700000
51800000
51900000
52000000
52100000
52200000
52300000
52400000
52500000
52600000
52700000
52800000
52900000
53000000
53100000
53200000
53300000
53400000
53500000
53600000
53700000
53800000
53900000
54000000
54100000
54200000
54300000
54400000
54500000
54600000
54700000
54800000
54900000
55000000
55100000
55200000
55300000
55400000
55500000
55600000
55700000
55800000
55900000
56000000
56100000
56200000
56300000
56400000
56500000
56600000
56700000
56800000
56900000
57000000
57100000
57200000
57300000
57400000
57500000
57600000
57700000
57800000
57900000
58000000
58100000
58200000
58300000
58400000
58500000
58600000
58700000
58800000
58900000
59000000
59100000
59200000
59300000
59400000
59500000
59600000
59700000
59800000
59900000
60000000
60100000
60200000
60300000
60400000
60500000
60600000
60700000
60800000
60900000
61000000
61100000
61200000
61300000
61400000
61500000
61600000
61700000
61800000
61900000
62000000
62100000
62200000
62300000
62400000
62500000
62600000
62700000
62800000
62900000
63000000
63100000
63200000
63300000
63400000
63500000
63600000
63700000
63800000
63900000
64000000
64100000
64200000
64300000
64400000
64500000
64600000
64700000
64800000
64900000
65000000
65100000
65200000
65300000
65400000
65500000
65600000
65700000
65800000
65900000
66000000
66100000
66200000
66300000
66400000
66500000
66600000
66700000
66800000
66900000
67000000
67100000
67200000
67300000
67400000
67500000
67600000
67700000
67800000
67900000
68000000
68100000
68200000
68300000
68400000
68500000
68600000
68700000
68800000
68900000
69000000
69100000
69200000
69300000
69400000
69500000
69600000
69700000
69800000
69900000
70000000
70100000
70200000
70300000
70400000
70500000
70600000
70700000
70800000
70900000
71000000
71100000
71200000
71300000
71400000
71500000
71600000
71700000
71800000
71900000
72000000
72100000
72200000
72300000
72400000
72500000
72600000
72700000
72800000
72900000
73000000
73100000
73200000
73300000
73400000
73500000
73600000
73700000
73800000
73900000
74000000
74100000
74200000
74300000
74400000
74500000
74600000
74700000
74800000
74900000
75000000
75100000
75200000
75300000
75400000
75500000
75600000
75700000
75800000
75900000
76000000
76100000
76200000
76300000
76400000
76500000
76600000
76700000
76800000
76900000
77000000
77100000
77200000
77300000
77400000
Out[2]:
[('PCDH15', 1014985),
 ('RBFOX1', 817112),
 ('CSMD1', 743708),
 ('DLG2', 592735),
 ('SPOCK3', 525639)]

In [3]:
len(mutations_per_gene)


Out[3]:
55754

Now we want to group by number of mutations


In [4]:
distribution = Counter(mutations_per_gene.values())
distribution.most_common(10)


Out[4]:
[(188, 145),
 (190, 143),
 (213, 140),
 (179, 139),
 (177, 139),
 (198, 138),
 (206, 137),
 (199, 134),
 (187, 134),
 (192, 134)]

Now we plot the data...


In [5]:
x = sorted(distribution.keys())
y = [distribution[i] for i in x]

plt.figure(figsize=(10, 7))

plt.plot(x, y)
plt.yscale('log')
plt.xscale('log')
plt.title('Mutation distribution by gene')
plt.xlabel('$n$')
plt.ylabel('genes with $n$ mutations')
plt.show()


We can see the data resembles a power law but does not quite fit. It looks like it has a bump in the middle, this may be because the genes have wildly varying lengths. In order to correct this we have to normalize the mutations per gene by the length of the gene. This is done as follows:


In [6]:
# In order to find out the length of the 
# genes, we will use the Ensembl REST API.
import ensembl_rest
from itertools import islice

def chunks_of(iterable, size=10):
    """A generator that yields chunks of fixed size from the iterable."""
    iterator = iter(iterable)
    while True:
        next_ = list(islice(iterator, size))
        if next_:
            yield next_
        else:
            break
# ---
        
# Instantiate a client for communication with
# the Ensembl REST API.
client = ensembl_rest.EnsemblClient()


normalized_counts = dict()
lengths_distribution = Counter()
for i, gene_batch in enumerate(chunks_of(mutations_per_gene, size=1000)):
    # Get information of the genes
    gene_data = client.symbol_post('human',
                                   params={'symbols': gene_batch})
    gene_lengths = {gene: data['end'] - data['start'] + 1
                        for gene, data in gene_data.items()}
    lengths_distribution.update(gene_lengths.values())
    
    # Get the normalization
    normalized_counts.update({
        gene: mutations_per_gene[gene] / gene_lengths[gene]
            for gene in gene_data
    })
    
    print((i+1)*1000)


1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000
44000
45000
46000
47000
48000
49000
50000
51000
52000
53000
54000
55000
56000

In [7]:
c = Counter()
c.update(normalized_counts)
c.most_common(10)


Out[7]:
[('IGHD7-27', 639.0),
 ('MIR663A', 456.5483870967742),
 ('IGKJ4', 192.54054054054055),
 ('IGKJ1', 190.89473684210526),
 ('IGKJ3', 189.60526315789474),
 ('IGKJ2', 185.25641025641025),
 ('IGHJ4', 179.93478260869566),
 ('IGKJ5', 178.31578947368422),
 ('IGHJ3P', 174.84),
 ('IGHJ5', 174.0)]

In [8]:
normalized_distribution = Counter(normalized_counts.values())
normalized_distribution.most_common(10)


Out[8]:
[(2.0, 18),
 (2.5, 12),
 (3.0, 11),
 (0.5, 11),
 (0.25, 9),
 (0.3333333333333333, 9),
 (0.6666666666666666, 9),
 (1.5, 8),
 (1.4299065420560748, 7),
 (1.0, 7)]

In [9]:
x = sorted(normalized_distribution.keys())
y = [normalized_distribution[i] for i in x]

plt.figure(figsize=(10, 7))

plt.plot(x, y)
plt.xscale('log')
plt.title('Mutations per base distribution by gene (normalized)')
plt.xlabel('$x$')
plt.ylabel('genes with $x$ mutations per base pair')
plt.show()



In [10]:
max(lengths_distribution)


Out[10]:
2473537

In [11]:
min(lengths_distribution)


Out[11]:
8

In [12]:
lengths_distribution.most_common(5)


Out[12]:
[(107, 530), (104, 185), (106, 112), (62, 94), (105, 87)]

In [13]:
x = sorted(lengths_distribution.keys())
y = [lengths_distribution[i] for i in x]

plt.figure(figsize=(10, 7))

plt.plot(x, y)
plt.xscale('log')
plt.yscale('log')
plt.title('Gene lengths distribution')
plt.xlabel('$L$')
plt.ylabel('genes with length $L$')

plt.savefig('gene-lengths.png')
plt.show()



In [14]:
lengths_distribution


Out[14]:
Counter({2963: 2,
         1308: 4,
         1020: 8,
         34493: 1,
         86: 44,
         36466: 1,
         1596: 2,
         660: 5,
         282: 20,
         943: 7,
         11207: 1,
         8176: 1,
         146: 6,
         57459: 1,
         104: 185,
         20910: 1,
         174474: 1,
         978: 3,
         231363: 1,
         59196: 1,
         979: 7,
         164: 37,
         27606: 1,
         16895: 1,
         9326: 1,
         6718: 1,
         258991: 1,
         61: 27,
         16005: 1,
         118384: 1,
         6299: 1,
         381: 10,
         69711: 1,
         5101: 1,
         580: 14,
         416: 5,
         555: 6,
         399: 3,
         8285: 1,
         3325: 2,
         848: 7,
         52319: 1,
         93357: 1,
         524: 6,
         9265: 1,
         35919: 1,
         7405: 5,
         17740: 1,
         156: 12,
         87575: 1,
         5484: 1,
         95: 45,
         52447: 2,
         4087: 1,
         1122: 5,
         42074: 3,
         2541: 4,
         994: 8,
         47151: 1,
         23486: 1,
         534: 7,
         57949: 1,
         391: 11,
         293: 32,
         97308: 1,
         1098: 2,
         15167: 1,
         66146: 1,
         33950: 2,
         21226: 1,
         7920: 3,
         275: 12,
         298: 70,
         175278: 1,
         1314: 1,
         1656: 4,
         9089: 3,
         108: 74,
         6155: 1,
         2184: 4,
         37049: 1,
         19818: 2,
         152: 7,
         5280: 2,
         64629: 1,
         13517: 2,
         228716: 1,
         21198: 1,
         5080: 2,
         54965: 1,
         129: 21,
         124882: 1,
         78: 43,
         150: 5,
         107: 530,
         17643: 1,
         6329: 2,
         17485: 2,
         15124: 2,
         3342: 2,
         20741: 1,
         96017: 1,
         3111: 1,
         49393: 1,
         12645: 2,
         337: 8,
         15538: 2,
         1468: 3,
         158471: 1,
         7109: 1,
         29480: 1,
         33197: 1,
         19307: 1,
         111638: 1,
         90: 30,
         4631: 1,
         118: 40,
         13237: 2,
         7018: 1,
         42670: 2,
         80106: 1,
         39072: 1,
         11315: 2,
         582: 6,
         7010: 2,
         14980: 1,
         3481: 1,
         44603: 1,
         9266: 1,
         20470: 2,
         67064: 1,
         366928: 1,
         58000: 2,
         783: 3,
         208264: 1,
         93557: 1,
         34129: 1,
         2569: 2,
         383: 3,
         20246: 1,
         7166: 1,
         20751: 1,
         4182: 1,
         88384: 1,
         103: 86,
         250: 14,
         38482: 1,
         157: 4,
         20711: 1,
         28762: 1,
         22540: 1,
         73940: 1,
         120: 28,
         3993: 1,
         66: 40,
         7057: 2,
         17894: 1,
         48599: 2,
         61391: 1,
         910: 12,
         84812: 1,
         93103: 1,
         1563: 2,
         8105: 2,
         41764: 1,
         9487: 1,
         21095: 1,
         3873: 3,
         280: 20,
         4247: 2,
         6500: 1,
         1157: 4,
         238: 8,
         495: 8,
         6641: 4,
         18756: 1,
         132: 20,
         126: 38,
         84805: 1,
         83: 45,
         27799: 1,
         295175: 1,
         56742: 1,
         73190: 1,
         26563: 1,
         79: 38,
         15510: 1,
         33334: 1,
         52781: 1,
         357: 5,
         67700: 1,
         31387: 2,
         4332: 1,
         42226: 1,
         318: 22,
         161: 8,
         299: 57,
         3102: 1,
         8960: 2,
         28235: 1,
         10563: 1,
         24252: 3,
         26080: 1,
         2998: 1,
         1302: 3,
         2411: 3,
         33264: 1,
         13464: 2,
         63625: 1,
         160047: 1,
         50645: 1,
         26775: 1,
         135: 30,
         67374: 1,
         148408: 1,
         42331: 1,
         23627: 1,
         549: 3,
         165: 15,
         8946: 3,
         50277: 1,
         140568: 1,
         22884: 1,
         101: 42,
         64911: 1,
         485: 9,
         7063: 3,
         10707: 2,
         14077: 1,
         44429: 1,
         34977: 1,
         9109: 1,
         8966: 1,
         993: 4,
         1510: 4,
         29705: 1,
         2777: 2,
         7107: 3,
         865: 6,
         12426: 2,
         18719: 1,
         3402: 3,
         12777: 1,
         14135: 1,
         5728: 2,
         8022: 2,
         56505: 3,
         402: 5,
         39056: 1,
         134: 17,
         9208: 1,
         8002: 2,
         343452: 1,
         915: 4,
         2629: 3,
         31720: 1,
         10879: 1,
         37594: 1,
         2101: 4,
         12717: 1,
         600: 7,
         13618: 2,
         832: 12,
         162: 13,
         11594: 1,
         2237: 4,
         14351: 1,
         62659: 2,
         240106: 1,
         72865: 1,
         71758: 1,
         525: 9,
         9380: 1,
         155973: 1,
         166: 10,
         3184: 1,
         30084: 1,
         20721: 1,
         2637: 1,
         82824: 1,
         7831: 1,
         43065: 1,
         281: 18,
         90641: 1,
         115879: 1,
         7086: 1,
         2813: 3,
         19304: 1,
         9163: 1,
         19355: 2,
         12580: 1,
         6978: 3,
         564: 3,
         25352: 2,
         820: 7,
         10454: 2,
         43279: 1,
         35762: 1,
         493: 10,
         6203: 1,
         489: 6,
         2627: 1,
         17445: 1,
         411: 15,
         736: 7,
         11528: 1,
         7907: 2,
         80302: 1,
         7541: 2,
         96: 69,
         75: 61,
         66287: 1,
         9103: 2,
         218494: 1,
         36938: 1,
         6838: 2,
         998: 7,
         62916: 1,
         4320: 1,
         25720: 2,
         954: 11,
         1849: 4,
         6058: 2,
         15260: 2,
         9965: 3,
         125: 12,
         51588: 1,
         319: 17,
         13948: 1,
         56272: 1,
         8970: 1,
         56182: 1,
         7857: 3,
         12492: 1,
         1003: 4,
         1766: 3,
         17234: 1,
         624: 4,
         25732: 1,
         5406: 2,
         37297: 1,
         21295: 2,
         22844: 2,
         8881: 1,
         14565: 1,
         9069: 3,
         420: 5,
         785: 4,
         25335: 1,
         6201: 2,
         378: 6,
         9848: 1,
         5441: 1,
         95816: 1,
         9048: 1,
         957: 15,
         40845: 1,
         1657: 3,
         433: 10,
         22529: 1,
         3144: 2,
         3172: 2,
         6694: 1,
         11569: 2,
         190: 10,
         5375: 1,
         180: 5,
         14844: 1,
         34544: 1,
         16663: 1,
         28183: 2,
         1013: 2,
         562: 10,
         7132: 2,
         109456: 1,
         5028: 1,
         814: 8,
         12116: 1,
         158074: 1,
         96702: 1,
         63174: 1,
         16697: 2,
         14227: 2,
         520: 11,
         14422: 1,
         35567: 1,
         85678: 1,
         29685: 2,
         49535: 1,
         5141: 2,
         17348: 2,
         110: 74,
         14083: 1,
         208013: 2,
         167: 5,
         14075: 2,
         3812: 3,
         12909: 1,
         34727: 1,
         214: 7,
         6462: 1,
         508: 5,
         85210: 1,
         136: 14,
         301: 29,
         730: 9,
         232406: 1,
         14147: 1,
         393: 10,
         37560: 1,
         68775: 1,
         15948: 1,
         45476: 1,
         4883: 2,
         148367: 1,
         18815: 1,
         2616: 1,
         10231: 1,
         10740: 2,
         937: 14,
         194457: 1,
         6583: 2,
         5640: 3,
         1318: 4,
         35758: 1,
         1570: 4,
         25618: 1,
         3066: 2,
         1254: 1,
         187: 7,
         24052: 1,
         4486: 1,
         1478: 1,
         559: 8,
         36823: 1,
         9481: 3,
         41530: 1,
         16602: 1,
         123: 8,
         80831: 1,
         67276: 1,
         29549: 1,
         18418: 1,
         2600: 2,
         1386: 5,
         655: 9,
         5232: 4,
         21553: 1,
         753: 3,
         41490: 1,
         6149: 3,
         30165: 1,
         49822: 1,
         968: 8,
         543: 4,
         1225252: 1,
         3121: 1,
         7911: 1,
         1660: 2,
         282016: 1,
         22829: 1,
         17362: 1,
         18768: 1,
         170799: 1,
         11031: 1,
         3631: 1,
         106511: 1,
         8219: 1,
         393985: 1,
         88: 33,
         246: 5,
         62853: 1,
         53126: 1,
         8432: 1,
         5188: 2,
         12772: 2,
         13634: 1,
         27397: 1,
         6817: 1,
         5564: 2,
         28352: 1,
         22537: 2,
         4166: 2,
         45167: 1,
         17164: 2,
         29825: 1,
         9645: 1,
         300: 38,
         1846: 2,
         48678: 1,
         17171: 1,
         68: 36,
         161424: 1,
         390: 5,
         3882: 3,
         113476: 1,
         59052: 1,
         14677: 2,
         4140: 1,
         4721: 4,
         6806: 2,
         2194: 2,
         1959: 3,
         10526: 1,
         78051: 1,
         143: 10,
         8953: 1,
         24579: 1,
         7488: 2,
         6726: 2,
         3856: 2,
         1006: 13,
         8535: 1,
         51717: 1,
         50920: 1,
         40289: 1,
         19977: 1,
         294: 29,
         66814: 1,
         4824: 1,
         92: 36,
         17381: 1,
         65129: 1,
         71900: 1,
         3350: 4,
         76: 44,
         41334: 1,
         18982: 1,
         60302: 1,
         1029: 8,
         6138: 2,
         4068: 1,
         5605: 2,
         158040: 1,
         504: 8,
         354: 15,
         297: 53,
         47200: 1,
         1540: 2,
         637: 6,
         7481: 1,
         11331: 3,
         37102: 1,
         168: 4,
         3881: 2,
         50759: 1,
         1558: 4,
         35468: 1,
         9975: 2,
         44439: 1,
         110531: 1,
         40650: 1,
         4983: 1,
         265: 13,
         33806: 1,
         210671: 1,
         71751: 1,
         17107: 1,
         222: 6,
         67: 32,
         576969: 1,
         66211: 1,
         7247: 1,
         3581: 3,
         18584: 1,
         2438: 1,
         15741: 2,
         12445: 2,
         448: 10,
         40880: 1,
         1119: 4,
         1903: 3,
         1328: 2,
         349: 10,
         11715: 1,
         35136: 1,
         105616: 1,
         607: 10,
         219: 10,
         19955: 1,
         67949: 1,
         1360: 3,
         9775: 4,
         8632: 1,
         72: 58,
         15176: 1,
         35449: 1,
         6211: 1,
         4950: 2,
         162432: 1,
         32696: 1,
         32342: 1,
         44637: 1,
         16312: 1,
         969: 8,
         422: 4,
         91084: 1,
         99: 51,
         5238: 1,
         12637: 1,
         11542: 1,
         27833: 1,
         12098: 1,
         23580: 1,
         18143: 1,
         9261: 2,
         981: 7,
         3847: 2,
         703: 4,
         232707: 1,
         130358: 1,
         15508: 2,
         34386: 1,
         312: 17,
         131: 11,
         6524: 2,
         114: 23,
         21595: 1,
         23025: 1,
         6435: 5,
         31751: 1,
         17284: 1,
         10804: 2,
         9138: 1,
         87086: 1,
         1240: 3,
         1200: 3,
         31705: 1,
         26452: 1,
         4133: 1,
         160143: 1,
         25971: 1,
         652: 5,
         140061: 1,
         1145: 4,
         55810: 1,
         26943: 2,
         85: 49,
         21032: 1,
         930: 22,
         10040: 1,
         7626: 1,
         67343: 1,
         51051: 1,
         304: 28,
         20449: 3,
         77: 45,
         48697: 1,
         25755: 1,
         8264: 2,
         235437: 1,
         5804: 1,
         1906: 2,
         975: 8,
         1023: 6,
         27877: 2,
         73: 42,
         4797: 1,
         9986: 1,
         23829: 1,
         15196: 2,
         29988: 1,
         3759: 1,
         4236: 1,
         1124: 4,
         1139: 4,
         35526: 1,
         60348: 1,
         2763: 2,
         2207: 1,
         58125: 1,
         18494: 1,
         10219: 2,
         1072: 2,
         27026: 3,
         4288: 3,
         109739: 1,
         6274: 2,
         163883: 1,
         502: 15,
         1409: 4,
         39345: 1,
         942: 19,
         11272: 1,
         7543: 1,
         25414: 1,
         30936: 1,
         18956: 1,
         522: 17,
         119: 78,
         233302: 1,
         24091: 2,
         87: 50,
         35408: 1,
         3420: 3,
         16647: 1,
         27050: 2,
         676: 7,
         54128: 2,
         73036: 1,
         7093: 2,
         447: 8,
         305: 22,
         971: 5,
         12142: 1,
         5823: 3,
         46633: 1,
         38580: 1,
         15624: 1,
         1425: 1,
         24324: 1,
         260: 5,
         4619: 3,
         1039: 5,
         99660: 1,
         106: 112,
         2739: 2,
         15356: 2,
         9714: 1,
         25492: 1,
         1779: 4,
         947: 8,
         8028: 3,
         76157: 1,
         89: 30,
         26724: 1,
         17267: 2,
         514: 8,
         18865: 2,
         5126: 2,
         128: 14,
         183101: 1,
         139: 13,
         9284: 1,
         39196: 1,
         8768: 1,
         60220: 1,
         65800: 1,
         31822: 1,
         35121: 1,
         4921: 1,
         26537: 1,
         17491: 1,
         19373: 1,
         987: 7,
         33602: 1,
         1088: 3,
         310: 17,
         1828: 3,
         76010: 1,
         58906: 1,
         98: 45,
         80209: 1,
         51332: 1,
         4881: 1,
         6783: 1,
         19407: 1,
         196695: 1,
         1041: 7,
         462: 14,
         64258: 1,
         12246: 1,
         11912: 1,
         7360: 1,
         33387: 2,
         2301: 5,
         103463: 1,
         87971: 2,
         2892: 1,
         5021: 1,
         130: 18,
         6334: 1,
         25691: 1,
         1116: 3,
         100: 58,
         663: 8,
         941: 6,
         339: 12,
         457: 8,
         21310: 1,
         32965: 1,
         129295: 1,
         142: 10,
         8342: 1,
         24332: 1,
         16898: 2,
         617: 1,
         45938: 1,
         1588: 2,
         52675: 1,
         9774: 2,
         33106: 1,
         21318: 1,
         13597: 1,
         16873: 1,
         18190: 1,
         990: 6,
         42065: 1,
         48144: 1,
         343713: 1,
         4742: 1,
         6727: 4,
         2008: 1,
         15765: 1,
         227540: 1,
         110926: 2,
         32379: 1,
         39144: 1,
         42088: 1,
         40663: 2,
         38980: 1,
         11476: 1,
         300702: 1,
         1390: 5,
         7604: 2,
         10620: 2,
         8637: 3,
         23786: 1,
         76692: 1,
         8436: 1,
         5948: 2,
         6962: 2,
         38461: 1,
         3169: 2,
         1334: 3,
         192074: 1,
         8279: 1,
         55188: 1,
         6940: 2,
         19911: 1,
         1311: 1,
         270: 18,
         568: 10,
         29226: 2,
         23182: 2,
         61084: 1,
         197777: 1,
         4597: 1,
         26023: 1,
         5634: 1,
         17292: 1,
         623: 4,
         51244: 1,
         6659: 1,
         609: 8,
         929: 6,
         50308: 2,
         1085: 3,
         6974: 2,
         3107: 1,
         6452: 1,
         2044: 2,
         23533: 1,
         102631: 1,
         45929: 2,
         3858: 3,
         42959: 1,
         1402: 1,
         8994: 1,
         1927: 2,
         705: 3,
         6983: 1,
         302: 17,
         7044: 1,
         295: 45,
         28093: 1,
         117: 29,
         18566: 1,
         23029: 1,
         204423: 1,
         79485: 1,
         12048: 2,
         35703: 1,
         945: 18,
         79772: 1,
         25171: 1,
         7398: 1,
         5561: 2,
         390007: 1,
         1264: 9,
         29558: 1,
         1447: 7,
         10251: 2,
         9682: 1,
         4806: 1,
         29504: 1,
         12258: 1,
         1282: 4,
         65567: 1,
         51545: 1,
         18511: 2,
         20312: 1,
         41345: 1,
         13602: 2,
         38422: 1,
         12553: 2,
         7765: 3,
         5590: 1,
         6301: 2,
         2732: 1,
         1018: 6,
         72423: 1,
         109544: 1,
         860: 5,
         5891: 2,
         2110: 3,
         37837: 2,
         1076: 4,
         1137: 4,
         14749: 1,
         3514: 2,
         694: 4,
         49197: 1,
         11604: 1,
         4668: 1,
         14528: 1,
         2371: 1,
         60: 38,
         17196: 2,
         32360: 2,
         15262: 1,
         24382: 1,
         122: 14,
         10689: 1,
         12606: 2,
         835899: 1,
         10500: 3,
         70728: 1,
         71762: 1,
         91: 36,
         918: 12,
         15864: 1,
         193: 4,
         5234: 3,
         25802: 1,
         8509: 2,
         911: 10,
         2423: 2,
         1340: 1,
         8667: 1,
         8635: 2,
         30387: 1,
         9537: 2,
         303: 33,
         31400: 1,
         4992: 2,
         1051: 7,
         1280: 1,
         10398: 1,
         423: 6,
         21790: 1,
         92597: 1,
         307672: 1,
         28571: 1,
         72195: 1,
         1270: 3,
         84613: 1,
         17864: 1,
         6731: 1,
         82: 81,
         8979: 1,
         435: 9,
         1288: 5,
         3365: 2,
         45571: 1,
         15681: 2,
         2958: 1,
         1279: 10,
         5782: 1,
         46259: 2,
         105833: 1,
         11185: 2,
         116: 33,
         7314: 3,
         7381: 2,
         27485: 2,
         1854: 1,
         1909: 2,
         961: 9,
         1431: 3,
         10957: 1,
         5466: 1,
         382435: 1,
         107961: 1,
         26138: 1,
         32188: 1,
         172826: 1,
         24819: 3,
         6702: 1,
         277332: 1,
         189016: 1,
         15893: 1,
         156028: 1,
         1172: 1,
         29434: 1,
         6432: 1,
         264: 13,
         186: 6,
         27088: 1,
         934: 10,
         ...})

In [ ]:


In [ ]: