This Python script converts genotype data in the Structure format to the fastStructure data format through a command line. Pass your .str file to the .py script as an argument. Structure data format is obtained by using the software Convert (see http://onlinelibrary.wiley.com/doi/10.1111/j.1471-8286.2004.00597.x/abstract)
In [ ]:
#! /usr/bin/env python3
#These 2 lines will pass your .str file to the .py file as an argument in the command line
import sys
In_File = sys.argv[1]
infile = open(In_File, 'r')
Out_File = In_File + 'fast.str'
outfile = open(Out_File, 'w')
data = infile.readlines()
data = data[1:] # Gets rid of loci names
for a in data:
a = a.strip('\n')
b = a.split()
c = b[2:]
d = ' '.join(c)
e = b[0]
f = b[1]
if a == data[-1]:
output_line = e +'\t'+ f +'\t1\t0\t0\textraCol\t'+ d
outfile.write(output_line)
else:
output_line = e +'\t'+ f +'\t1\t0\t0\textraCol\t'+ d + '\n'
outfile.write(output_line)
infile.close()
outfile.close()