In [2]:
from __future__ import print_function
from path import path
import csv, json

In [11]:
outdir = path('npactweb/npactweb/static/drawingtest/')
outdir.mkdir_p()
sample_nprofile_output_file = 'webroot/uploads/library/NC_007760.80524f06a4021199748d8779c0657200b69e1632.nprofile'
sample_extract_output_file = 'webroot/uploads/library/NC_007760.fcc471be757b9fcf724b04ec01e7caed5484e878.genes'
sample_nprofile_csv_file = outdir / 'nprofile.csv'
sample_nprofile_json_file = outdir / 'nprofile.json'
sample_extract_json_file = outdir / 'extract.json'
sample_extract_csv_file = outdir / 'extract.csv'

In [12]:
def write_csv(ofile, data, header=None):
    if not hasattr(ofile, 'write'):
        ofile = open(ofile, 'wb')
    #CSV writer isn't working for some unknown reason
#    cw = csv.writer(output, quoting=csv.QUOTE_NONE)
#    if(header):
#        cw.writerow(header)
#    rows = 0
#    cw.writerows(data)
    if header:
        ofile.write(','.join(header) + '\n')
    for l in data:
        ofile.write(','.join(map(str, l)) + '\n')

def write_json(ofile, data, keys):
    if not hasattr(ofile, 'write'):
        ofile = open(ofile, 'w')
    data = [dict(zip(keys,d)) for d in data]
    json.dump(data, ofile)

In [13]:
def parse_nprofile(ifile):
    #Read the file in and split the fields on space
    nprofile_lines = [l.split() for l in path(ifile).lines(retain=False)]
    #convert the strings to numbers
    data = [(int(c), float(x), float(y), float(z)) for (c,x,y,z) in nprofile_lines]
    return data

nprofile_data = parse_nprofile(sample_nprofile_output_file)
write_csv(sample_nprofile_csv_file, nprofile_data, header=['coordinate', 'r', 'g', 'b'])
write_json(sample_nprofile_json_file, nprofile_data, ['coordinate', 'r', 'g', 'b'])

In [14]:
import re
def parse_extract(ifile):
    lines = [str.split(l) for l in path(ifile).lines(retain=False)]
    for l in lines:
        name, coords = l
        complement = 0
        if coords.startswith('complement'):
            coords = coords[11:-1]
            complement = 1
        begin,end = coords.split('..')
        yield name, int(begin), int(end), complement

extract_data = list(parse_extract(sample_extract_output_file))
write_csv(sample_extract_csv_file, extract_data, header=['name', 'start', 'end', 'complement'])
write_json(sample_extract_json_file, extract_data, ['name', 'start', 'end', 'complement'])

In [15]:
[print(l) for l in path(sample_nprofile_csv_file).lines(retain=False)[0:10]]; None


coordinate,r,g,b
101,73.1,49.3,89.6
152,70.1,41.8,95.5
203,73.1,41.8,98.5
254,77.6,52.2,98.5
305,77.6,56.7,98.5
356,77.6,62.7,100.0
407,74.6,61.2,100.0
458,73.1,50.7,100.0
509,68.7,49.3,100.0

In [16]:
[print(l) for l in path(sample_extract_csv_file).lines(retain=False)[0:10]]; None


name,start,end,complement
Adeh_0001,22,1395,0
Adeh_0002,1811,2938,0
Adeh_0003,2958,4076,0
Adeh_0004,4264,6651,0
Adeh_0005,6733,7728,0
Adeh_0006,7938,8459,0
Adeh_0007,8483,9259,0
Adeh_0008,9256,11391,1
Adeh_0009,11509,12621,1

In [17]:
print(json.dumps(json.load(open(sample_nprofile_json_file))[0:4], indent=3))


[
   {
      "coordinate": 101, 
      "r": 73.1, 
      "b": 89.6, 
      "g": 49.3
   }, 
   {
      "coordinate": 152, 
      "r": 70.1, 
      "b": 95.5, 
      "g": 41.8
   }, 
   {
      "coordinate": 203, 
      "r": 73.1, 
      "b": 98.5, 
      "g": 41.8
   }, 
   {
      "coordinate": 254, 
      "r": 77.6, 
      "b": 98.5, 
      "g": 52.2
   }
]

In [18]:
print(json.dumps(json.load(open(sample_extract_json_file))[0:4], indent=3))


[
   {
      "start": 22, 
      "complement": 0, 
      "end": 1395, 
      "name": "Adeh_0001"
   }, 
   {
      "start": 1811, 
      "complement": 0, 
      "end": 2938, 
      "name": "Adeh_0002"
   }, 
   {
      "start": 2958, 
      "complement": 0, 
      "end": 4076, 
      "name": "Adeh_0003"
   }, 
   {
      "start": 4264, 
      "complement": 0, 
      "end": 6651, 
      "name": "Adeh_0004"
   }
]

In [ ]: