In [1]:
import codecs
from chardet.universaldetector import UniversalDetector
In [2]:
d221 = r'D:\TRENMO_JASP\CARRIS\emme\emme_files_jasp\d221_sc1_2009_modo_n.in'
In [3]:
detector = UniversalDetector()
for line in open(d221, 'r'):
detector.feed(line)
if detector.done:
break
detector.close()
print detector.result
In [4]:
# Get file data - A new line is defined by the letter 'a'
with codecs.open(d221, mode='r', encoding=detector.result['encoding']) as d221Obj:
data = []
lnh_array = None
for lnh in d221Obj:
if lnh[0] == 'a':
if not lnh_array:
lnh_array = []
else:
data.append(lnh_array)
lnh_array = []
lnh_array.append(unicode(codecs.encode(lnh, 'utf-8'), 'utf-8'))
In [5]:
for i in range(len(data)):
data[i] = "".join(data[i])
In [6]:
import pandas
df = pandas.DataFrame(data, columns=['str_value'])
In [7]:
df["str_value"] = df["str_value"].str.replace("\n", " ")
In [8]:
df["tst"] = df["str_value"].str[1:]
df["tst"] = df["tst"].str.replace("'", "")
df["tst"] = df["tst"].str.split()
df["tst"] = df["tst"].str.join(" ")
In [9]:
df["tst"] = df["tst"].str.replace(' -> ', '->')
In [10]:
df["interest"] = df.tst.str.split(r'\s*path=no \s*|\s* lay\s*').str[1]
In [11]:
df["interest"] = df.interest.str.replace(" ttf=11 ", " ")
df["interest"] = df.interest.str.replace(" ttf=0 ", " ")
In [12]:
df["pre_stops"] = df.interest.str.split(" ")
In [13]:
def get_stop_codes(row):
array = row["pre_stops"]
real_stops = []
nextIsStop = None
for i in range(len(array)):
if not i or i == len(array) - 1:
real_stops.append(array[i])
continue
else:
if array[i] == 'dwt=.50' or array[i] == 'dwt=>.50':
nextIsStop = True
continue
elif array[i] == 'dwt=#.00':
nextIsStop = False
continue
elif array[i].startswith('ttf'):
nextIsStop = True if nextIsStop else False
continue
else:
nextIsStop = True if nextIsStop else False
if nextIsStop:
real_stops.append(array[i])
else:
continue
row["real_stops"] = real_stops
return row
In [14]:
df = df.apply(lambda x: get_stop_codes(x), axis=1)
In [ ]:
from gasp.toarray import series_to_list
In [ ]:
stops = series_to_list(df["real_stops"])
In [ ]:
import numpy
In [ ]:
nnstops = numpy.concatenate(stops, axis=0)
In [ ]:
final_stops = numpy.unique(nnstops)
In [ ]:
from gasp.toxls import df_to_xls
In [ ]:
result = pandas.DataFrame(final_stops, columns=["stops"])
In [ ]:
df_to_xls(result, r'D:\TRENMO_JASP\CARRIS\emme\emme_files_jasp\stops_d221_sc1_2009_modo_n2.xlsx')
In [15]:
# Write new d221.in
df["first_line"] = df.str_value.str.split(" path=no").str[0]
df["first_line"] = df.first_line.str.replace("\n", "")
df["first_line"] = df.first_line.str.replace("\r", "")
In [16]:
df["last_line"] = "lay=" + df.str_value.str.split("lay=").str[1]
df["last_line"] = df.last_line.str.replace("\r", "")
In [ ]:
print df["first_line"].str[30:]
In [17]:
def adjust_stop_cod(row):
_stops_ = row["real_stops"]
new_stops = []
for i in range(len(_stops_)):
#_stops_[i] = unicode(str(200000 + int(_stops_[i])), 'utf-8')
new_stops.append(unicode(str(200000 + int(_stops_[i])), 'utf-8'))
row["real_stops"] = new_stops
return row
In [18]:
df = df.apply(lambda x: adjust_stop_cod(x), axis=1)
In [ ]:
print df.real_stops2[0]
In [ ]:
print df.real_stops[0]
In [19]:
with codecs.open(r'D:\TRENMO_JASP\CARRIS\emme\emme_files_jasp\d221_sul_tejo.in', 'w', encoding='utf-8') as txt:
txt.write(u"t lines\n")
txt.write(u"c\nc BOA VIAGEM\nc\n")
first_lines = df.first_line.values.tolist()
stops = df.real_stops.tolist()
lay = df.last_line.tolist()
for l in range(len(first_lines)):
l_stops = [stops[l][i:i+3] for i in range(0, len(stops[l]), 3)]
towrite = u"{}\n path=no {}\n {}{}{}{}".format(
first_lines[l], " ".join(l_stops[0]),
"\n ".join([" ".join(s) for s in l_stops[1:]]),
"\n " if len(l_stops[1:]) else "",
lay[i],
"" if l +1 == len(first_lines) else "\n"
)
txt.write(towrite)
txt.close()
In [ ]: