This Python script converts genotype data in the Structure format to the fastStructure data format through a command line. Pass your .str file to the .py script as an argument. Structure data format is obtained by using the software Convert (see http://onlinelibrary.wiley.com/doi/10.1111/j.1471-8286.2004.00597.x/abstract)


In [ ]:
#! /usr/bin/env python3

#These 2 lines will pass your .str file to the .py file as an argument in the command line
import sys
In_File = sys.argv[1]

infile = open(In_File, 'r')
Out_File = In_File + 'fast.str'
outfile = open(Out_File, 'w')

data = infile.readlines()
data = data[1:] # Gets rid of loci names

for a in data:
    a = a.strip('\n')
    b = a.split()
    c = b[2:]
    d = ' '.join(c)
    e = b[0]
    f = b[1]
    if a == data[-1]:
        output_line = e +'\t'+ f +'\t1\t0\t0\textraCol\t'+ d
        outfile.write(output_line)
    else:
        output_line = e +'\t'+ f +'\t1\t0\t0\textraCol\t'+ d + '\n'
        outfile.write(output_line)

infile.close()
outfile.close()