In [ ]:
# list
my_name = 'Anne'
my_list = [2, 4, 6, 8, my_name]
print(my_list)
print(my_list[1])
In [ ]:
# dictionary
my_dict = {'A': 'Adenine', 'C': 'Cytosine'}
print(my_dict)
print(my_dict['A'])
In [ ]:
# string
seq = 'ATC CTG TAC TTT'
codons = seq.split()
print(codons)
new_seq = ','.join(codons)
print(new_seq)
In [ ]:
# loop
seq = 'ATCCTGTACTT'
for base in seq:
print(base)
In [ ]:
# condition
base = 'A'
if base == 'A':
print('found base A')
In [ ]:
# loop and condition combined
seq = 'ATCCTGTACTT'
gc = 0
for base in seq:
if (base == 'G') or (base == 'C'):
gc += 1
print(base)
print('total number of GCs in the sequence', seq, 'is', gc)
In [ ]:
# file
seq = 'ATCCTGTACTT'
gc = 0
for base in seq:
if (base == 'G') or (base == 'C'):
gc += 1
with open('my_file.txt', 'w') as out:
out.write('seq,gc_content\n')
out.write('{},{}'.format(seq, gc))
In [ ]:
# build-in ones
seq = 'ATCCTGTACTT'
print(len(seq))
In [ ]:
# your own one
def gc_content(seq):
gc = 0
for base in seq:
if (base == 'G') or (base == 'C'):
gc += 1
return gc
seq = 'ATCCTGTACTT'
print(gc_content(seq))
print(gc_content('AAATCGATTTAAGGGG')) # reuse multiple time
with open('gc_content_data.csv', 'w') as out:
with open('seq.txt') as data:
for line in data:
seq = line.strip()
out.write('{},{}\n'.format(seq, gc_content(seq)))
In [ ]:
# import math
import math
dir(math)
In [ ]:
# os.path module
import os.path
print(os.path.exists('my_file_that_does_not_exist.txt'))
In [ ]:
import os.path
seq_filename = os.path.join('data', 'seq.txt')
if (os.path.exists(seq_filename)):
with open(seq_filename) as data:
for line in data:
print(line.strip())
print(os.path.dirname(seq_filename))
print(os.path.basename(seq_filename))
else:
print('file {} not found'.format(seq_filename))
In [ ]:
# csv module - reader
import csv
gc_content_filename = 'gc_content_data.csv'
if os.path.exists(gc_content_filename):
#print('file exists')
with open(gc_content_filename) as data:
#for line in data:
# print(line)
reader = csv.reader(data, delimiter = ",")
for row in reader:
print(row)
In [ ]:
import csv
gc_content_filename = 'gc_content_data.csv'
results = []
if os.path.exists(gc_content_filename):
with open(gc_content_filename) as data:
reader = csv.DictReader(data, delimiter = ",")
for row in reader:
results.append(row)
# ordered dictionary
print(results[1])
for r in results:
print('{}\t{}'.format(r['seq'], r['gc']))
In [ ]:
# csv module - writer
with open('output.txt', 'w') as out:
writer = csv.DictWriter(out, fieldnames=['seq', 'gc'], delimiter='\t')
#writer.writeheader()
for r in results:
writer.writerow(r)
In [ ]:
# pandas module
import pandas
data = pandas.read_csv('gc_content_data.csv')
print(data)
for i, d in data.iterrows():
print(d['seq'], d['gc'])
In [ ]:
data.to_csv('new_gc_content_data.csv', sep=',', index=False)
In [ ]:
# use this function and save it into a file called tools.py
def gc_content(seq):
gc = 0
for base in seq:
if (base == 'G') or (base == 'C'):
gc += 1
return gc
In [ ]:
import tools
print(tools.gc_content('AAATTTCCGG'))
In [ ]:
from tools import gc_content
print(gc_content('AAATTTCCGG'))