File handling

Input/Output


In [ ]:
# input function reads from standard in and returns a string
answer = input("enter your name:")
print('Hello ' + answer + '!')

Simple file operations

writing to files

In [ ]:
# files can be opened using the open function, which creates a file object
f = open( 'new_file.txt', 'w' ) # attention overwrites existing file
# important functions: read, write, readlines, writelines
# dir(f) 
f.write("Hallo Welt!")
f.close()

In [ ]:
# writelines
lines  = []
for i in range(12):
    lines.append("Number: " + str(i) + '\n')
print(lines)


f = open( 'new_file.txt', 'a' )  # open file and append to it
f.writelines(lines)
f.close()
reading from files

In [ ]:
# usage of with to open files is recommended in python
with open('new_file.txt', 'r') as f: # open file for reading
    content = f.read() # get the whole content of a file into a string

print(content)

In [ ]:
with open('new_file.txt', 'r') as f: # open file for reading
    lines = f.readlines()
    
print(lines)

Organizing files in folders


In [ ]:
# create a folder for the data files
import os

# get current working directory
work_path = os.getcwd()
print(work_path)

In [ ]:
# define path for data files
data_path = os.path.join(work_path, 'data/')

# check if folder exists already
if not os.path.exists(data_path): 
    os.mkdir(data_path) # create the folder
    
# Save a file to that folder
f = open( data_path + 'new_file.txt', 'w' )
f.write("Hallo Welt! (In a folder!!)")
f.close()

Parsing data

Example 2: fasta files


In [ ]:
with open('example.fasta', 'r') as f:
    lines = f.read()
#print(lines)
geneinfo ={}

entries = lines[1:].split('>') #fasta header line starts with >

for entry in entries:
    info, seq = entry.split('\n',1) # separate info from sequence
    # print(info)
    # print(seq)
    name, g, I, l = info.split(' ') # split info in subparts
    # print(g)
    # print(I)
    # print(l)
    # print(name)
    geneinfo[name] = {'gene':g, 
                      'ID':I,
                      'location':l,
                      'sequence':seq.replace('\n','') } # arrange all in dictionary

    
print(list(geneinfo.values())[0])

Example 2: Array like data


In [ ]:
with open('data.txt', 'r') as f:
    lines = f.readlines()
# print(lines)

data = {}

# iterate over all lines in the file
for line in lines:
    if line.startswith('#'): # skip comments
        continue
    left, right = line.split(':') # split splits a string at the occurence of the keyword
    data[ left.strip() ] = float(right) # strip removes leading and tailing spaces
print(data)