Figure 3A/B/C, crossover recombination


In [1]:
%run ../../shared_setup.ipynb


docker image cggh/biipy:v1.6.0

In [2]:
# load variation data
sample_exclusions = dup_samples.copy()
for cross in excessive_recomb_samples:
    sample_exclusions[cross] += excessive_recomb_samples[cross]

callsets = load_callsets(COMBINED_CALLSET_FN_TEMPLATE, 
                         sample_exclusions=sample_exclusions, 
                         variant_filter='FILTER_PASS',
                         call_filter=combined_conf_calls)

samples = {cross: callsets[cross]['calldata'].dtype.names
           for cross in CROSSES}
progeny = {cross: samples[cross][2:] for cross in CROSSES}
n_progeny = {cross: len(progeny[cross]) for cross in CROSSES}
print(n_progeny)
print(np.sum(list(n_progeny.values())))


2016-03-15 22:53:54.068230 :: loading /data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/3d7_hb3.combined.final.npz
2016-03-15 22:53:54.338037 :: filter variants: excluding 157 (0.4%) retaining 42087 (99.6%) of 42244 variants
2016-03-15 22:53:54.354424 :: filter samples: excluding ['C01/PG0062-C/ERR019070', 'C02/PG0053-C/ERR019067', 'C02/PG0055-C/ERR019066', 'C02/PG0056-C/ERR019068'] including ['3D7/PG0051-C/ERR019061', 'HB3/PG0052-C/ERR019054', 'C01/PG0065-C/ERR019064', 'C02/PG0067-C/ERR019073', 'C03/PG0066-C/ERR019072', 'C04/PG0061-C/ERR019059', 'C05/PG0068-C/ERR019065', 'C06/PG0069-C/ERR019055', 'C07/PG0070-C/ERR019056', 'C08/PG0071-C/ERR019074', 'C09/PG0072-C/ERR019057', 'C10/PG0063-C/ERR019060', 'C11/PG0064-C/ERR019071', 'C12/PG0058-C/ERR019063', 'C13/PG0054-C/ERR019062', 'C14/PG0060-C/ERR019058', 'C15/PG0057-C/ERR019069']
2016-03-15 22:53:54.398137 :: filter calls: excluding 2216 (0.3%) retaining 713263 (99.7%) of 715479 calls
2016-03-15 22:53:54.399026 :: loading /data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/hb3_dd2.combined.final.npz
2016-03-15 22:53:54.743970 :: filter variants: excluding 450 (1.2%) retaining 36461 (98.8%) of 36911 variants
2016-03-15 22:53:54.761904 :: filter samples: excluding ['SC01/PG0025-C/ERR019045'] including ['HB3/PG0004-CW/ERR012788', 'DD2/PG0008-CW/ERR012840', '1BB5/PG0023-C/ERR015449', '3BA6/PG0022-Cx/ERR126027', '3BD5/PG0024-C/ERR019053', '7C101/PG0074-C/ERR019048', '7C111/PG0038-C/ERR015457', '7C12/PG0035-Cx/ERR037704', '7C126/PG0047-C/ERR015452', '7C140/PG0039-C/ERR015454', '7C159/PG0040-Cx/ERR107475', '7C16/PG0036-C/ERR015455', '7C170/PG0041-C/ERR015446', '7C183/PG0042-C/ERR015448', '7C188/PG0030-C/ERR019046', '7C20/PG0037-C/ERR015451', '7C3/PG0034-C/ERR019047', '7C408/PG0031-C/ERR015458', '7C421/PG0043-C/ERR015459', '7C424/PG0044-C/ERR019043', '7C46/PG0046-Cx/ERR107476', '7C7/PG0048-C/ERR019049', 'B1SD/PG0015-C/ERR019044', 'B4R3/PG0018-C/ERR019042', 'CH3_116/PG0032-Cx/ERR037703', 'CH3_61/PG0033-Cx/ERR175544', 'D43/PG0029-Cx/ERR107474', 'GC03/PG0021-C/ERR015447', 'GC06/PG0028-C/ERR015456', 'QC01/PG0017-C/ERR019050', 'QC13/PG0016-C/ERR012895', 'QC23/PG0045-C/ERR012892', 'QC34/PG0026-C/ERR015453', 'SC05/PG0019-C/ERR019051', 'TC05/PG0027-C/ERR015450', 'TC08/PG0020-C/ERR019052']
2016-03-15 22:53:54.824021 :: filter calls: excluding 28934 (2.2%) retaining 1283662 (97.8%) of 1312596 calls
2016-03-15 22:53:54.825824 :: loading /data/plasmodium/pfalciparum/pf-crosses/data/public/20141022/7g8_gb4.combined.final.npz
2016-03-15 22:53:55.168268 :: filter variants: excluding 304 (0.9%) retaining 34471 (99.1%) of 34775 variants
2016-03-15 22:53:55.187280 :: filter samples: excluding ['AUD/PG0112-CW/ERR045639', 'JC9/PG0111-C/ERR029409', 'JE11/PG0100-CW/ERR045630', 'JF6/PG0079-CW/ERR045637', 'KB8/PG0104-CW/ERR045642', 'LA10/PG0086-CW/ERR045629', 'NIC/PG0095-C/ERR027107', 'QF5/PG0078-CW/ERR045638', 'XD8/PG0105-CW/ERR045628', 'XF12/PG0102-CW/ERR045635', 'D2/PG0094-CW/ERR045632'] including ['7G8/PG0083-C/ERR027099', 'GB4/PG0084-C/ERR027100', 'AL2/PG0103-CW/ERR045627', 'AUD/PG0112-C/ERR029406', 'DAN/PG0098-C/ERR027110', 'DEV/PG0081-CW/ERR045633', 'JB12/PG0099-C/ERR029146', 'JB8/PG0087-C/ERR029091', 'JC3/PG0077-CW/ERR045636', 'JC9/PG0111-CW/ERR045634', 'JE11/PG0100-C/ERR029404', 'JF6/PG0079-C/ERR027102', 'JON/PG0107-C/ERR029408', 'KA6/PG0091-C/ERR027117', 'KB8/PG0104-C/ERR029148', 'KH7/PG0088-C/ERR027111', 'LA10/PG0086-C/ERR029090', 'NF10/PG0096-C/ERR027108', 'NIC/PG0095-CW/ERR045631', 'QF5/PG0078-C/ERR029092', 'TF1/PG0080-C/ERR027103', 'WC4/PG0082-C/ERR029093', 'WE2/PG0085-C/ERR027101', 'WF12/PG0097-C/ERR027109', 'XB3/PG0093-C/ERR029105', 'XD8/PG0105-C/ERR029144', 'XE7/PG0106-C/ERR029407', 'XF12/PG0102-C/ERR029143', 'XG10/PG0109-C/ERR029405']
2016-03-15 22:53:55.239571 :: filter calls: excluding 12209 (1.2%) retaining 987450 (98.8%) of 999659 calls
{'7g8_gb4': 27, '3d7_hb3': 15, 'hb3_dd2': 34}
76

How many CO events in total?


In [3]:
tbl_co = etl.frompickle(os.path.join(PUBLIC_DIR, 'tbl_co.pickle'))
display_with_nrows(tbl_co, caption='CO events')


CO events (1194 rows)
0|sample 1|chrom 2|co_pos_mid 3|co_pos_min 4|co_pos_max 5|co_pos_range 6|cross 7|co_from_parent 8|co_to_parent
B1SD/PG0015-C/ERR019044 b'Pf3D7_01_v3' 145052 144877 145227 350 hb3_dd2 hb3 dd2
GC03/PG0021-C/ERR015447 b'Pf3D7_01_v3' 163584 163145 164024 879 hb3_dd2 dd2 hb3
XF12/PG0102-C/ERR029143 b'Pf3D7_01_v3' 206769 205803 207736 1933 7g8_gb4 gb4 7g8
7C159/PG0040-Cx/ERR107475 b'Pf3D7_01_v3' 206905 206074 207736 1662 hb3_dd2 hb3 dd2
CH3_61/PG0033-Cx/ERR175544 b'Pf3D7_01_v3' 206905 206074 207736 1662 hb3_dd2 dd2 hb3

...


In [4]:
(tbl_co
 .valuecounts('cross')
  .addfield('count_per_meiosis', 
            lambda row: row['count'] / n_progeny[row['cross']])
 .display(caption='CO events by cross')
)


CO events by cross
0|cross 1|count 2|frequency 3|count_per_meiosis
hb3_dd2 544 0.4556113902847571 16.0
7g8_gb4 385 0.3224455611390285 14.25925925925926
3d7_hb3 265 0.2219430485762144 17.666666666666668

CO recombination rate


In [5]:
# the simple method...
X = tbl_co.valuecounts('sample').values('count').list()

n = len(X)
assert np.sum(list(n_progeny.values())) == n
print('meioses:', n)

print('crossovers:', np.sum(X))

mu_hat = np.mean(X)
print('total map length: %.2f' % mu_hat, 'Morgan')

mu_stderr = np.sqrt(mu_hat / n)
print('map length stderr:', mu_stderr)

mu_95ci = 1.96 * mu_stderr
print('map length 95%% CI: %.2f - %.2f' % (mu_hat - mu_95ci, mu_hat + mu_95ci))


meioses: 76
crossovers: 1194
total map length: 15.71 Morgan
map length stderr: 0.454661909031
map length 95% CI: 14.82 - 16.60

In [6]:
# calculate marker span using accessibility regions, it's fairer and simpler
tbl_marker_span = (
    tbl_regions_1b
    .eq('region_type', 'Core')
    .aggregate(key='region_chrom', 
               aggregation={'start': ('region_start', min), 'stop': ('region_stop', max)})
    .rename('region_chrom', 'chrom')
    .addfield('span', lambda row: row.stop - row.start)
)
total_marker_span = tbl_marker_span.values('span').sum()
tbl_marker_span.displayall(caption='Core genome span (total = %.2f)' % (total_marker_span/1e6))


Core genome span (total = 21.16)
0|chrom 1|start 2|stop 3|span
Pf3D7_01_v3 92901 575900 482999
Pf3D7_02_v3 105801 862500 756699
Pf3D7_03_v3 70631 1003060 932429
Pf3D7_04_v3 91421 1143990 1052569
Pf3D7_05_v3 37901 1321390 1283489
Pf3D7_06_v3 72351 1294830 1222479
Pf3D7_07_v3 77101 1381600 1304499
Pf3D7_08_v3 73561 1365730 1292169
Pf3D7_09_v3 79101 1473560 1394459
Pf3D7_10_v3 68971 1571815 1502844
Pf3D7_11_v3 110001 2003320 1893319
Pf3D7_12_v3 60301 2163700 2103399
Pf3D7_13_v3 74414 2791900 2717486
Pf3D7_14_v3 35775 3255710 3219935

In [7]:
co_hat = (mu_hat / (total_marker_span / 1e6))
print('CO rate: %.4f Morgan/Mb' % co_hat)
co_95ci_lower = (mu_hat - mu_95ci) / (total_marker_span / 1e6)
co_95ci_upper = (mu_hat + mu_95ci) / (total_marker_span / 1e6)
print('CO rate 95%% CI: %.4f - %.4f Morgan/Mb' % (co_95ci_lower, co_95ci_upper))
print()
print('CO rate: %.1f kb/cM' % (10/co_hat))
print('CO rate 95%% CI: %.1f - %.1f kb/cM' % (10/co_95ci_upper, 10/co_95ci_lower))


CO rate: 0.7425 Morgan/Mb
CO rate 95% CI: 0.7004 - 0.7846 Morgan/Mb

CO rate: 13.5 kb/cM
CO rate 95% CI: 12.7 - 14.3 kb/cM

CO rate by progeny


In [8]:
tbl_co.valuecounts('sample', 'cross').head(5).display()
tbl_co.valuecounts('sample', 'cross').tail(5).display()


0|sample 1|cross 2|count 3|frequency
C04/PG0061-C/ERR019059 3d7_hb3 26 0.021775544388609715
7C408/PG0031-C/ERR015458 hb3_dd2 25 0.020938023450586266
SC05/PG0019-C/ERR019051 hb3_dd2 23 0.019262981574539362
CH3_61/PG0033-Cx/ERR175544 hb3_dd2 22 0.018425460636515914
C14/PG0060-C/ERR019058 3d7_hb3 22 0.018425460636515914
0|sample 1|cross 2|count 3|frequency
JC9/PG0111-CW/ERR045634 7g8_gb4 9 0.007537688442211055
B4R3/PG0018-C/ERR019042 hb3_dd2 8 0.006700167504187605
WE2/PG0085-C/ERR027101 7g8_gb4 8 0.006700167504187605
7C16/PG0036-C/ERR015455 hb3_dd2 7 0.005862646566164154
JON/PG0107-C/ERR029408 7g8_gb4 5 0.0041876046901172526

CO rate by cross


In [9]:
# the simple method...

for cross in CROSSES:
    
    print()
    print(LABELS[cross])
    X = tbl_co.eq('cross', cross).valuecounts('sample').values('count').list()

    n = len(X)
    assert n_progeny[cross] == n
    print('meioses:', n)

    print('crossovers:', np.sum(X))

    mu_hat = np.mean(X)
    print('total map length: %.2f' % mu_hat, 'Morgan')

    mu_stderr = np.sqrt(mu_hat / n)
    print('map length stderr:', mu_stderr)

    mu_95ci = 1.96 * mu_stderr
    print('map length 95%% CI: %.2f - %.2f' % (mu_hat - mu_95ci, mu_hat + mu_95ci))


3D7 x HB3
meioses: 15
crossovers: 265
total map length: 17.67 Morgan
map length stderr: 1.08525470641
map length 95% CI: 15.54 - 19.79

HB3 x Dd2
meioses: 34
crossovers: 544
total map length: 16.00 Morgan
map length stderr: 0.68599434057
map length 95% CI: 14.66 - 17.34

7G8 x GB4
meioses: 27
crossovers: 385
total map length: 14.26 Morgan
map length stderr: 0.726719143346
map length 95% CI: 12.83 - 15.68

In [10]:
df_co_by_sample = (
    tbl_co
    .valuecounts('cross', 'sample')
    .sort(key=('cross', 'sample'))
    .rename('count', 'n_co')
    .todataframe()
)
df_co_by_sample.head()


Out[10]:
cross sample n_co frequency
0 3d7_hb3 C01/PG0065-C/ERR019064 15 0.012563
1 3d7_hb3 C02/PG0067-C/ERR019073 15 0.012563
2 3d7_hb3 C03/PG0066-C/ERR019072 17 0.014238
3 3d7_hb3 C04/PG0061-C/ERR019059 26 0.021776
4 3d7_hb3 C05/PG0068-C/ERR019065 11 0.009213

In [11]:
vals = [df_co_by_sample[df_co_by_sample.cross == cross]['n_co'].values for cross in CROSSES]

f_val, p_val = stats.f_oneway(*vals)
print('ANOVA', f_val, p_val)

h_val, p_val = stats.kruskal(*vals)
print('Kruskal', h_val, p_val)


ANOVA 3.20793043117 0.0462029646224
Kruskal 5.53134398344 0.0629337941466

In [12]:
len(df_co_by_sample)


Out[12]:
76

In [14]:
plt.boxplot?

In [26]:
def plot_co_rate_by_cross(ax):
    X = df_co_by_sample.n_co
    F = df_co_by_sample.cross
    vals = [X[F == cross] for cross in CROSSES]
    sns.despine(ax=ax)
    sns.despine(ax=ax, offset=5)
    #sns.violinplot(vals, ax=ax, linewidth=.5, )
    #sns.boxplot(vals, ax=ax)
    ax.boxplot(vals, notch=True, bootstrap=10000, medianprops=dict(linewidth=1))
    ax.set_xticklabels([LABELS[cross] for cross in CROSSES], fontsize=8)
    ax.set_ylim(0, 30)
    ax.set_ylabel('genetic map length (Morgan)')

In [27]:
width = 8/3
height = width 
fig, ax = plt.subplots(figsize=(width, height))
ax.set_xlabel('cross', color='w')
plot_co_rate_by_cross(ax)
ax.set_title('A', fontweight='bold')
fig.tight_layout()
fig.savefig('../../artwork/main/fig3A.jpg', dpi=900, jpeg_quality=100)


CO rate by chromosome


In [15]:
lkp_span = tbl_marker_span.lookupone('chrom', 'span')
lkp_span


Out[15]:
{'Pf3D7_01_v3': 482999,
 'Pf3D7_02_v3': 756699,
 'Pf3D7_03_v3': 932429,
 'Pf3D7_04_v3': 1052569,
 'Pf3D7_05_v3': 1283489,
 'Pf3D7_06_v3': 1222479,
 'Pf3D7_07_v3': 1304499,
 'Pf3D7_08_v3': 1292169,
 'Pf3D7_09_v3': 1394459,
 'Pf3D7_10_v3': 1502844,
 'Pf3D7_11_v3': 1893319,
 'Pf3D7_12_v3': 2103399,
 'Pf3D7_13_v3': 2717486,
 'Pf3D7_14_v3': 3219935}

In [16]:
tbl_co_by_chrom = [['cross', 'sample', 'chrom', 'count']]

for cross, sample in sorted(tbl_co.values(('cross', 'sample')).set()):
    for chrom in CHROMOSOMES:
        n = tbl_co.select(lambda row: row.cross == cross and row.sample == sample and row.chrom == chrom).nrows()
        tbl_co_by_chrom.append([cross, sample, chrom, n])

tbl_co_by_chrom = (etl
    .wrap(tbl_co_by_chrom)
    .addfield('span', lambda row: lkp_span[str(row.chrom, 'ascii')])
)
tbl_co_by_chrom.display()


0|cross 1|sample 2|chrom 3|count 4|span
3d7_hb3 C01/PG0065-C/ERR019064 b'Pf3D7_01_v3' 0 482999
3d7_hb3 C01/PG0065-C/ERR019064 b'Pf3D7_02_v3' 0 756699
3d7_hb3 C01/PG0065-C/ERR019064 b'Pf3D7_03_v3' 1 932429
3d7_hb3 C01/PG0065-C/ERR019064 b'Pf3D7_04_v3' 0 1052569
3d7_hb3 C01/PG0065-C/ERR019064 b'Pf3D7_05_v3' 1 1283489

...


In [17]:
df_co_by_chrom = tbl_co_by_chrom.todataframe()

In [18]:
plot = sns.lmplot('span', 'count', df_co_by_chrom, x_estimator=np.mean, col='cross')
plot.set(ylim=(0, 4));



In [19]:
def plot_co_rate_by_chrom(ax, scatter_kws=dict()):
    sns.despine(ax=ax)
    sns.offset_spines(ax=ax)
    sns.regplot('span', 'count', df_co_by_chrom, x_estimator=np.mean, scatter_kws=scatter_kws, ax=ax)
    ax.set_xlabel('chromosome marker span (bp)')
    ax.set_ylabel('genetic map length (Morgan)')
    ax.set_ylim(0, 3)

In [20]:
width = 8/3
height = width
fig, ax = plt.subplots(figsize=(width, height))
plot_co_rate_by_chrom(ax, scatter_kws=dict(s=12))
ax.set_xticks(range(0, 3500000, 1000000))
ax.set_xticklabels(range(0, 4, 1))
ax.set_xlabel('chromosome marker span (Mbp)')
ax.set_title('B', fontweight='bold')
fig.tight_layout()
fig.savefig('../../artwork/main/fig3B.jpg', dpi=900, jpeg_quality=100)


/usr/local/lib/python3.5/dist-packages/seaborn/utils.py:260: UserWarning: `offset_spines` is deprecated and will be removed in v0.5
  warnings.warn(warn_msg, UserWarning)

CO rate relative to centromere

TODO redo with newer genome annotations to get an extra centromere


In [21]:
def distance_to_centromere(row):
    cen_id = 'PF3D7_CEN' + str(row.chrom[6:8], 'ascii')
    cen = lkp_feature[cen_id]
    cen_pos = (cen['feature_start'] + cen['feature_stop'])/2
    return abs(row.co_pos_mid - cen_pos)

df_co_cen = (
    tbl_co
    .lt('co_pos_range', 10000)  # require 10kb certainty
    .ne('chrom', b'Pf3D7_10_v3')  # no centromere
    .todataframe()
)
df_co_cen['cen_dist'] = df_co_cen.apply(distance_to_centromere, axis=1)
print(len(df_co_cen))
df_co_cen.head()


1087
Out[21]:
sample chrom co_pos_mid co_pos_min co_pos_max co_pos_range cross co_from_parent co_to_parent cen_dist
0 B1SD/PG0015-C/ERR019044 b'Pf3D7_01_v3' 145052 144877 145227 350 hb3_dd2 hb3 dd2 314069
1 GC03/PG0021-C/ERR015447 b'Pf3D7_01_v3' 163584 163145 164024 879 hb3_dd2 dd2 hb3 295537
2 XF12/PG0102-C/ERR029143 b'Pf3D7_01_v3' 206769 205803 207736 1933 7g8_gb4 gb4 7g8 252352
3 7C159/PG0040-Cx/ERR107475 b'Pf3D7_01_v3' 206905 206074 207736 1662 hb3_dd2 hb3 dd2 252216
4 CH3_61/PG0033-Cx/ERR175544 b'Pf3D7_01_v3' 206905 206074 207736 1662 hb3_dd2 dd2 hb3 252216

In [22]:
def get_cen_pos(chrom):
    cen_id = 'PF3D7_CEN' + chrom[6:8]
    cen = lkp_feature[cen_id]
    return cen['feature_start'], cen['feature_stop']

tbl_chrom_cen = (
    tbl_regions_1b
    .ne('region_chrom', 'Pf3D7_10_v3')
    .eq('region_type', 'Core')
    .aggregate(key='region_chrom', 
               aggregation={'core_start': ('region_start', min), 'core_stop': ('region_stop', max)})
    .rename('region_chrom', 'chrom')
    .addfield('cen', lambda row: get_cen_pos(row.chrom))
    .unpack('cen', ['cen_start', 'cen_stop'])
    .addfield('cen_pos', lambda row: (row.cen_start + row.cen_stop)/2)
)
tbl_chrom_cen


Out[22]:
0|chrom 1|core_start 2|core_stop 3|cen_start 4|cen_stop 5|cen_pos
Pf3D7_01_v3 92901 575900 457931 460311 459121.0
Pf3D7_02_v3 105801 862500 447300 450450 448875.0
Pf3D7_03_v3 70631 1003060 597816 600275 599045.5
Pf3D7_04_v3 91421 1143990 642003 644529 643266.0
Pf3D7_05_v3 37901 1321390 455740 457252 456496.0

...


In [23]:
def plot_co_rate_cen(window_size=20000, ax=None, scatter_kws=dict()):
    XX = list()  # distance to centromere
    YY = list()  # CO recombination rate

    # N.B., we need to account for the fact that some chromosomes are bigger than others.
    # Consider each chromosome separately, move upstream and downstream of centromere separately.
    # Gather what data there are.
    # Should mean that estimates of recombination rate further from centromere have greater uncertainty.
    # However should not mean there is any other effect of chromosome size.
    for row in tbl_chrom_cen.records():
        
        # downstream
        max_dist = row.core_stop - row.cen_pos
        bins = np.arange(0, max_dist, window_size)
        X = (bins[:-1] + bins[1:])/2.
        # recombination rate
        D = df_co_cen[(df_co_cen.chrom == row.chrom.encode('ascii')) & (df_co_cen.co_pos_min > row.cen_pos)].cen_dist
        Y, _ = np.histogram(D, bins=bins)
        XX.append(X)
        YY.append(Y)
        
        # upstream
        min_dist = row.cen_pos - row.core_start
        bins = np.arange(0, min_dist, window_size)
        X = (bins[:-1] + bins[1:])/2.
        # recombination rate
        D = df_co_cen[(df_co_cen.chrom == row.chrom.encode('ascii')) & (df_co_cen.co_pos_max < row.cen_pos)].cen_dist
        Y, _ = np.histogram(D, bins=bins)
        XX.append(X)
        YY.append(Y)
    
    XX = np.concatenate(XX)
    YY = np.concatenate(YY)
    YY = YY / np.sum(list(n_progeny.values()))  # Morgan
    YY = YY / (window_size/1e6)  # Morgan/Mbp
    
    if ax is None:
        fig, ax = plt.subplots()
    sns.despine(ax=ax, offset=5)
    sns.regplot(XX, YY, x_estimator=np.mean, x_ci=95, ax=ax, fit_reg=False, scatter_kws=scatter_kws)
    ax.set_xlim(0, 700000)
    ax.set_ylim(0, 2)
    ax.set_ylabel('CO recombination rate (Morgan/Mbp)')
    ax.set_xlabel('distance from centromere (bp)')
    ax.axhline(np.mean(YY), linestyle=':', linewidth=.5)

In [24]:
width = 8/3
height = width
fig, ax = plt.subplots(figsize=(width, height))
plot_co_rate_cen(30000, ax=ax, scatter_kws=dict(s=12))
ax.set_xlim(0, 510000)
ax.set_xticks(range(0, 600000, 100000))
ax.set_xticklabels(range(0, 600, 100))
ax.set_xlabel('distance from centromere (kbp)')
ax.set_title('C', fontweight='bold')
fig.tight_layout()
fig.savefig('../../artwork/main/fig3C.jpg', dpi=900, jpeg_quality=100)



In [25]:
8 * 2.54


Out[25]:
20.32

In [26]:
(16/3)*2.54


Out[26]:
13.546666666666667

In [ ]: