This Jupyter notebook is intented to be used alongside the book Python for Bioinformatics
Note: Before opening the file, this file should be accesible from this Jupyter notebook. In order to do so, the following commands will download these files from Github and extract them into a directory called samples.
In [2]:
!curl https://raw.githubusercontent.com/Serulab/Py4Bio/master/samples/samples.tar.bz2 -o samples.tar.bz2
!mkdir samples
!tar xvfj samples.tar.bz2 -C samples
Once the previous command are executed, you can open the file
In [28]:
file_handle = open('samples/readme.txt', 'r')
In [29]:
file_handle
Out[29]:
In [30]:
file_handle = open('samples/seqA.fas', 'r')
file_handle.read()
Out[30]:
In [31]:
file_handle = open('samples/readme.txt', 'r')
# do something with the file
file_handle.read()
file_handle.close()
In [32]:
with open('samples/readme.txt', 'r') as file_handle:
print(file_handle.read())
In [33]:
with open('samples/seqA.fas', 'r') as file_handle:
print(file_handle.read())
Listing 5.1: firstread.py: First try to read a FASTA file
In [2]:
with open('samples/seqA.fas') as fh:
my_file = fh.read()
name = my_file.split('\n')[0][1:]
sequence = ''.join(my_file.split('\n')[1:])
print('The name is : {0}'.format(name))
print('The sequence is: {0}'.format(sequence))
Listing 5.2: fastaRead.py: Reads FASTA file, sequentially
In [35]:
sequence = ' '
with open('samples/seqA.fas') as fh:
name = fh.readline()[1:-1]
for line in fh:
sequence += line.replace('\n','')
print('The name is : {0}'.format(name))
print('The sequence is: {0}'.format(sequence))
Listing 5.3: netchargefile.py: Calculate the net charge, reading the input from a file
In [36]:
sequence = ''
charge = -0.002
aa_charge = {'C':-.045, 'D':-.999, 'E':-.998, 'H':.091,
'K':1, 'R':1, 'Y':-.001}
with open('samples/prot.fas') as fh:
fh.readline()
for line in fh:
sequence += line[:-1].upper()
for aa in sequence:
charge += aa_charge.get(aa,0)
print(charge)
In [37]:
fh = open('samples/newfile.txt','w')
In [38]:
fh = open('samples/error.log','a')
Listing 5.4: Newfile.py: Write numbers to a file.
In [39]:
with open('samples/numbers.txt','w') as fh:
fh.write('1\n2\n3\n4\n5')
Listing 5.5: nettofile.py Net charge calculation, saving results in a file
In [40]:
sequence = ' '
charge = -0.002
aa_charge = {'C':-.045, 'D':-.999, 'E':-.998, 'H':.091,
'K':1, 'R':1, 'Y':-.001}
with open('samples/prot.fas') as fh:
next(fh)
for line in fh:
sequence += line[:-1].upper()
for aa in sequence:
charge += aa_charge.get(aa, 0)
with open('samples/out.txt','w') as file_out:
file_out.write(str(charge))
Listing 5.6: csvwocsv.py: Reading data from a CSV file
In [6]:
total_len = 0
with open('samples/B1.csv') as fh:
next(fh)
for n, line in enumerate(fh):
data = line.split(',')
total_len += int(data[1])
print(total_len/(n+1))
Listing 5.7: csv1.py: Reading data from a CSV file, using csv module
In [5]:
import csv
total_len=0
lines = csv.reader(open('samples/B1.csv'))
next(lines)
for n, line in enumerate(lines):
total_len += int(line[1])
print(total_len/(n+1))
In [4]:
data = list(csv.reader(open('samples/B1.csv')))
data[0][2]
Out[4]:
In [5]:
data[1][1]
Out[5]:
In [7]:
data[1][2]
Out[7]:
In [8]:
data[3][0]
Out[8]:
In [9]:
rows = csv.reader(open('/etc/passwd'), delimiter=':')
In [7]:
rows = csv.reader(open('samples/data.csv'), dialect='excel')
In [11]:
dialect = csv.Sniffer().sniff(open('samples/data.csv').read())
rows = csv.reader(open('samples/data.csv'), dialect=dialect)
print(next(rows))
print(next(rows))
Listing 5.8: excel1.py: Reading an xlsx file with xlrd
In [9]:
import xlrd
iedb = {}
book = xlrd.open_workbook('samples/sampledata.xlsx')
sh = book.sheet_by_index(0)
for row_index in range(1, sh.nrows): #skips fist line.
iedb[int(sh.cell_value(rowx=row_index, colx=0))] = \
sh.cell_value(rowx=row_index, colx=2)
print(iedb)
Listing 5.9: excel2.py: Write an XLS file with xlwt
In [12]:
import xlwt
list1 = [1,2,3,4,5]
list2 = [234,267,281,301,331]
wb = xlwt.Workbook()
ws = wb.add_sheet('First sheet')
ws.write(0,0,'Column A')
ws.write(0,1,'Column B')
i = 1
for x,y in zip(list1,list2): #Walk two list at the same time.
ws.write(i,0,x) # Row, Column, Data.
ws.write(i,1,y)
i += 1
wb.save('mynewfile.xls')
Listing 5.10: picklesample.py: Basic pickle sample
In [13]:
import pickle
sp_dict = {'one':'uno', 'two':'dos', 'three':'tres'}
with open('spdict.data', 'wb') as fh:
pickle.dump(sp_dict, fh)
In [14]:
import pickle
pickle.load(open('spdict.data','rb'))
{'one':'uno', 'two':'dos', 'three':'tres'}
Out[14]: