notebook.community

Edit and run



In [1]:

    
import codecs
from chardet.universaldetector import UniversalDetector



In [2]:

    
d221 = r'D:\TRENMO_JASP\CARRIS\emme\emme_files_jasp\d221_sc1_2009_modo_n.in'



In [3]:

    
detector = UniversalDetector()

for line in open(d221, 'r'):
    detector.feed(line)
    if detector.done:
        break

detector.close()
print detector.result









    



{'confidence': 0.73, 'language': '', 'encoding': 'ISO-8859-1'}



In [4]:

    
# Get file data - A new line is defined by the letter 'a'
with codecs.open(d221, mode='r', encoding=detector.result['encoding']) as d221Obj:
    
    data = []
    
    lnh_array = None
    for lnh in d221Obj:
        if lnh[0] == 'a':
            if not lnh_array:
                lnh_array = []
            else:
                data.append(lnh_array)
                lnh_array = []
        
        lnh_array.append(unicode(codecs.encode(lnh, 'utf-8'), 'utf-8'))



In [5]:

    
for i in range(len(data)):
    data[i] = "".join(data[i])



In [6]:

    
import pandas
df = pandas.DataFrame(data, columns=['str_value'])



In [7]:

    
df["str_value"] = df["str_value"].str.replace("\n", " ")



In [8]:

    
df["tst"] = df["str_value"].str[1:]
df["tst"] = df["tst"].str.replace("'", "")
df["tst"] = df["tst"].str.split()
df["tst"] = df["tst"].str.join(" ")



In [9]:

    
df["tst"] = df["tst"].str.replace(' -> ', '->')



In [10]:

    
df["interest"] = df.tst.str.split(r'\s*path=no \s*|\s* lay\s*').str[1]



In [11]:

    
df["interest"] = df.interest.str.replace(" ttf=11 ", " ")
df["interest"] = df.interest.str.replace(" ttf=0 ", " ")



In [12]:

    
df["pre_stops"] = df.interest.str.split(" ")



In [13]:

    
def get_stop_codes(row):
    array = row["pre_stops"]
    
    real_stops = []
    nextIsStop = None
    for i in range(len(array)):
        if not i or i == len(array) - 1:
            real_stops.append(array[i])
            continue
        
        else:
            if array[i] == 'dwt=.50' or array[i] == 'dwt=>.50':
                nextIsStop = True
                
                continue
            
            elif array[i] == 'dwt=#.00':
                nextIsStop = False
                continue
            
            elif array[i].startswith('ttf'):
                nextIsStop = True if nextIsStop else False
                continue
            
            else:
                nextIsStop = True if nextIsStop else False
        
        if nextIsStop:
            real_stops.append(array[i])
        else:
            continue
    
    row["real_stops"] = real_stops
    
    return row



In [14]:

    
df = df.apply(lambda x: get_stop_codes(x), axis=1)



In [ ]:

    
from gasp.toarray import series_to_list



In [ ]:

    
stops = series_to_list(df["real_stops"])



In [ ]:

    
import numpy



In [ ]:

    
nnstops = numpy.concatenate(stops, axis=0)



In [ ]:

    
final_stops = numpy.unique(nnstops)



In [ ]:

    
from gasp.toxls import df_to_xls



In [ ]:

    
result = pandas.DataFrame(final_stops, columns=["stops"])



In [ ]:

    
df_to_xls(result, r'D:\TRENMO_JASP\CARRIS\emme\emme_files_jasp\stops_d221_sc1_2009_modo_n2.xlsx')



In [15]:

    
# Write new d221.in

df["first_line"] = df.str_value.str.split(" path=no").str[0]
df["first_line"] = df.first_line.str.replace("\n", "")
df["first_line"] = df.first_line.str.replace("\r", "")



In [16]:

    
df["last_line"] = "lay=" + df.str_value.str.split("lay=").str[1]
df["last_line"] = df.last_line.str.replace("\r", "")



In [ ]:

    
print df["first_line"].str[30:]



In [17]:

    
def adjust_stop_cod(row):
    _stops_ = row["real_stops"]
    
    new_stops = []
    for i in range(len(_stops_)):
        #_stops_[i] = unicode(str(200000 + int(_stops_[i])), 'utf-8')
        new_stops.append(unicode(str(200000 + int(_stops_[i])), 'utf-8'))
    
    row["real_stops"] = new_stops
    
    return row



In [18]:

    
df = df.apply(lambda x: adjust_stop_cod(x), axis=1)



In [ ]:

    
print df.real_stops2[0]



In [ ]:

    
print df.real_stops[0]



In [19]:

    
with codecs.open(r'D:\TRENMO_JASP\CARRIS\emme\emme_files_jasp\d221_sul_tejo.in', 'w', encoding='utf-8') as txt:
    txt.write(u"t lines\n")
    txt.write(u"c\nc BOA VIAGEM\nc\n")
    
    first_lines = df.first_line.values.tolist()
    stops = df.real_stops.tolist()
    lay = df.last_line.tolist()
    
    for l in range(len(first_lines)):
        l_stops = [stops[l][i:i+3] for i in range(0, len(stops[l]), 3)]
        towrite = u"{}\n path=no {}\n {}{}{}{}".format(
            first_lines[l], " ".join(l_stops[0]),
            "\n ".join([" ".join(s) for s in l_stops[1:]]),
            "\n " if len(l_stops[1:]) else "",
            lay[i],
            "" if l +1 == len(first_lines) else "\n"
        )
        
        txt.write(towrite)
    
    txt.close()



In [ ]: