In [1]:
%load_ext autoreload
%autoreload 2
from Bio import SeqIO
In [2]:
def report(seqrecord):
print(seqrecord)
print('Length: {0} n.t.'.format(len(seqrecord.seq)))
Load the Victoria PB2 sequence:
In [3]:
vicpb2 = SeqIO.read('../../data/victoria-pb2.fasta', 'fasta')
report(vicpb2)
Load the pCI sequence:
In [4]:
pCI = SeqIO.read('../../data/pCI.fasta', 'fasta')
report(pCI)
Define a split at the 1/3 and 2/3 mark of the Victoria PB2 and pCI sequences. This is a backup measure just in case full amplification does not work.
In [5]:
def splits(seqrecord, frac1):
return round(len(seqrecord.seq) * frac1)
vicpb2_split1 = splits(vicpb2, 1/3)
vicpb2_split1
vicpb2_split2 = splits(vicpb2, 2/3)
vicpb2_split2
Out[5]:
In [6]:
pCI_split1 = splits(pCI, 1/3)
pCI_split1
pCI_split2 = splits(pCI, 2/3)
pCI_split2
Out[6]:
Also define a 2/5 and 3/5 split mark for the Victoria PB2 sequence. (Based on the latest PCR results, the Assembly 4 split point doesn't work.)
In [7]:
vicpb2_split3 = splits(vicpb2, 2/5)
vicpb2_split4 = splits(vicpb2, 3/5)
print(vicpb2_split3, vicpb2_split4)
In [8]:
from Bio.SeqRecord import SeqRecord
def split_seqrecord(seqrecord, split_location):
split1 = SeqRecord(seqrecord.seq[0:split_location], id='{0}_part1'.format(seqrecord.id))
split2 = SeqRecord(seqrecord.seq[split_location:], id='{0}_part2'.format(seqrecord.id))
return split1, split2
# Create the split parts based on vicpb2 split 1 (this is the 1/3 mark)
vicpb2_split1_part1, vicpb2_split1_part2 = split_seqrecord(vicpb2, vicpb2_split1)
len(vicpb2_split1_part1), len(vicpb2_split1_part2)
Out[8]:
In [9]:
# Create the split parts based on vicpb2 split 2 (this is the 2/3 mark)
vicpb2_split2_part1, vicpb2_split2_part2 = split_seqrecord(vicpb2, vicpb2_split2)
len(vicpb2_split2_part1), len(vicpb2_split2_part2)
Out[9]:
In [10]:
# Create the split parts based on vicpb2 split 3 (this is the 2/5 mark)
vicpb2_split3_part1, vicpb2_split3_part2 = split_seqrecord(vicpb2, vicpb2_split3)
len(vicpb2_split3_part1), len(vicpb2_split3_part2)
Out[10]:
In [11]:
# Create the split parts based on vicpb2 split 4 (this is the 3/5 mark)
vicpb2_split4_part1, vicpb2_split4_part2 = split_seqrecord(vicpb2, vicpb2_split4)
len(vicpb2_split4_part1), len(vicpb2_split4_part2)
Out[11]:
In [12]:
# Create the split parts based on pCI split 1
pCI_split1_part1, pCI_split1_part2 = split_seqrecord(pCI, pCI_split1)
pCI_split1_part1, pCI_split1_part2
Out[12]:
In [13]:
# Create the split parts based on pCI split 2
pCI_split2_part1, pCI_split2_part2 = split_seqrecord(pCI, pCI_split2)
pCI_split2_part1, pCI_split2_part2
Out[13]:
Steps:
In [14]:
from mbtools.assembly import GibsonAssembler
In [15]:
# Assembly 1: Scenario where VicPB2 and pCI are amplified full-length.
assembly1 = GibsonAssembler([vicpb2, pCI])
for part, product in assembly1.pcr_products().items():
print(part, len(product))
In [16]:
# Assembly 2: Scenario where VicPB2 is split into two parts at the 1/3 point.
assembly2 = GibsonAssembler([vicpb2_split1_part1, vicpb2_split1_part2, pCI])
for part, product in assembly2.pcr_products().items():
print(part, len(product))
In [17]:
# Assembly 3: Scenario where VicPB2 and pCI are split each at their 1/3 points.
assembly3 = GibsonAssembler([vicpb2_split1_part1, vicpb2_split1_part2, pCI_split1_part1, pCI_split1_part2])
for part, product in assembly3.pcr_products().items():
print(part, len(product))
In [18]:
# Assembly 4: Scenario where VicPB2 and pCI are split each at their 2/3 points.
assembly4 = GibsonAssembler([vicpb2_split2_part1, vicpb2_split2_part2, pCI_split2_part1, pCI_split2_part2])
for part, product in assembly4.pcr_products().items():
print(part, len(product))
In [19]:
# Assembly 5: Scenario where we use the 2/5 split point for PB2 and 2/3 split point for pCI.
assembly5 = GibsonAssembler([vicpb2_split3_part1, vicpb2_split3_part2, pCI_split2_part1, pCI_split2_part2])
for part, product in assembly5.pcr_products().items():
print(part, len(product))
In [20]:
# Assembly 5: Scenario where we use the 3/5 split point for PB2 and 2/3 split point for pCI.
assembly6 = GibsonAssembler([vicpb2_split4_part1, vicpb2_split4_part2, pCI_split2_part1, pCI_split2_part2])
for part, product in assembly6.pcr_products().items():
print(part, len(product))
In [21]:
def gather_assembly_primers(assembly):
assembly_primers = []
for part, primers in assembly.primers().items():
for primer, sequence in primers.items():
prec = dict()
prec['part'] = part
prec['primer'] = primer
prec['sequence'] = str(sequence)
assembly_primers.append(prec)
return assembly_primers
assembly4_primers = gather_assembly_primers(assembly4)
In [22]:
assembly5_primers = gather_assembly_primers(assembly5)
assembly6_primers = gather_assembly_primers(assembly6)
Check the primers from Assembly 4 into the database.
In [23]:
from tinydb import TinyDB, Query
primerdb = TinyDB('../../data/primers.db.json')
In [24]:
def checkin_primers(primers, database):
"""
Checks the primers into the database.
"""
for primer in primers:
primer['received'] = False
p = Query()
if not database.contains(p.sequence == primer['sequence']):
database.insert(primer)
print('Inserting {primer}...'.format(primer=primer))
checkin_primers(assembly4_primers, primerdb)
In [25]:
checkin_primers(assembly5_primers, primerdb)
In [26]:
checkin_primers(assembly6_primers, primerdb)
In [27]:
len(primerdb)
Out[27]:
Some primers have already been received. Here is a function that marks them as "received".
In [28]:
def mark_unreceived_primers(database, eids):
"""
Marks a list of primers as not received.
"""
print('updating EIDs as being not received')
database.update({'received': False}, eids=eids)
for eid in eids:
assert database.get(eid=eid)['received'] == False
In [29]:
def mark_primers_as_received(database, eids):
assert isinstance(eids, list)
print('updating EIDs as being received')
database.update({'received':True}, eids=eids)
# Very quick check to make sure it is truly received.
for eid in eids:
assert database.get(eid=eid)['received'] == True
mark_primers_as_received(primerdb, list(range(25, 36)))
Export the primers to be ordered.
In [30]:
import pandas as pd
q = Query()
primers_to_order = primerdb.search(q.received == False)
primers_to_order
for p in primers_to_order:
eid = primerdb.get(Query().sequence == p['sequence']).eid
p['primer_id'] = 'EM-{0}'.format(eid)
print(p)
pd.DataFrame(primers_to_order).to_csv('primers.csv')
In [31]:
# Get primers out from each of the reactions.
a1_primers = assembly1.primers()
a1_primers
Out[31]:
In [32]:
# Match up the Fw_gibson and Re_gibson primers for each template.
len(a1_primers['pCI']['fw_gibson'])
Out[32]:
In [33]:
def pprint_primers(assembly):
primer_set = assembly.primers()
for tmplt, prmrs in primer_set.items():
print(tmplt)
for prmr, sqnc in prmrs.items():
if 'gibson' in prmr:
# print(prmr)
p = Query()
eid = primerdb.get(p.sequence == str(sqnc)).eid
print(' Primer: {0}'.format(prmr))
print(' Primer ID: EM-{0}'.format(eid))
print(' Tm: {0:.1f}ºC'.format(Tm_NN(sqnc)))
print(' Size: {0} bp'.format(len(assembly.pcr_products()[tmplt])))
print('')
In [34]:
from Bio.SeqUtils.MeltingTemp import Tm_Wallace, Tm_staluc, Tm_NN
pprint_primers(assembly1)
We will amplify Victoria PB2 using EM-2 and EM-11 on the mutagenesis kit, and amplify pCI using EM-6 and EM-15 using Phusion GC master-mix.
In [35]:
for part, seq in assembly4.pcr_products().items():
print(part, len(seq))
In [36]:
# The PCR protocol for pCI is dependent on the length of the PCR product.
bases_per_min = 2000
len(assembly1.pcr_products()['pCI']) / bases_per_min
Out[36]:
In [37]:
pprint_primers(assembly4)
In [38]:
for part, seq in assembly4.pcr_products().items():
print(part, len(seq))
In [ ]: