In [12]:
i = 0
def clean(s):
return " ".join(re.findall(r'\w+', s,flags = re.UNICODE | re.LOCALE)).lower()
with open("train_titanic.csv", "r") as infile:
reader = csv.reader(infile)
for line in reader:
print line
i += 1
if (i == 2): break
In [13]:
i = 0
def clean(s):
return " ".join(re.findall(r'\w+', s,flags = re.UNICODE | re.LOCALE)).lower()
with open("test_titanic.csv", "r") as infile:
reader = csv.reader(infile)
for line in reader:
print line
i += 1
if (i == 2): break
In [14]:
import csv
import re
i = 0
def clean(s):
return " ".join(re.findall(r'\w+', s,flags = re.UNICODE | re.LOCALE)).lower()
with open("train_titanic.csv", "r") as infile, open("train_titanic.vw", "wb") as outfile:
reader = csv.reader(infile)
for line in reader:
i+= 1
if i > 1:
vw_line = ""
if str(line[1]) == "1":
vw_line += "1 '"
else:
vw_line += "-1 '"
vw_line += str(line[0]) + " |f "
vw_line += "passenger_class_" + str(line[2]) + " "
vw_line += "sex_" + str(line[3]) + " "
vw_line += "age:" + str(line[4]) + " "
vw_line += "sibsp:" + str(line[5]) + " "
vw_line += "parch:" + str(line[6]) + " "
vw_line += "fare:" + str(line[7]) + " "
vw_line += "embarked_" + str(line[8]) + " "
vw_line += "last_name_" + str(line[9]) + " "
vw_line += "title_" + str(line[10]) + " "
vw_line += "fare2_" + str(line[11]) + " "
vw_line += "familysize:" + str(line[12]) + " "
vw_line += "farepp:" + str(line[13]) + " "
vw_line += "deck_" + str(line[14]) + " "
vw_line += "side_" + str(line[15]) + " "
outfile.write(vw_line[:-1] + "\n")
i = 0
with open("test_titanic.csv", "r") as infile, open("test_titanic.vw", "wb") as outfile:
reader = csv.reader(infile)
for line in reader:
i+= 1
if i > 1:
vw_line = ""
vw_line += "1 '"
vw_line += str(line[0]) + " |f "
vw_line += "passenger_class_" + str(line[1]) + " "
vw_line += "sex_" + str(line[2]) + " "
vw_line += "age:" + str(line[3]) + " "
vw_line += "sibsp:" + str(line[4]) + " "
vw_line += "parch:" + str(line[5]) + " "
vw_line += "fare:" + str(line[6]) + " "
vw_line += "embarked_" + str(line[7]) + " "
vw_line += "last_name_" + str(line[8]) + " "
vw_line += "title_" + str(line[9]) + " "
vw_line += "fare2_" + str(line[10]) + " "
vw_line += "familysize:" + str(line[11]) + " "
vw_line += "farepp:" + str(line[12]) + " "
vw_line += "deck_" + str(line[13]) + " "
vw_line += "side_" + str(line[14]) + " "
outfile.write(vw_line[:-1] + "\n")
vw train_titanic.vw -f model.vw --binary --passes 20 -c -q ff --adaptive --normalized --l1 0.00000001 --l2 0.0000001 -b 24
vw -d test_titanic.vw -t -i model.vw -p preds_titanic.txt
In [19]:
import csv
with open("preds_titanic.txt", "r") as infile, open("submission_vw.csv", "wb") as outfile:
outfile.write("PassengerId,Survived\n")
for line in infile.readlines():
kaggle_line = str(line.split(" ")[1]).replace("\n","")
if str(int(float(line.split(" ")[0]))) == "1":
kaggle_line += ",1\n"
else:
kaggle_line += ",0\n"
outfile.write(kaggle_line)
In [18]:
i=0
with open("preds_titanic.txt", "r") as infile:
for line in infile.readlines():
print str(line.split(" ")[1]).replace("\n","")
print str(float(line.split(" ")[0]))
print str(int(float(line.split(" ")[0])))
print
i+=1
if i>5: break
In [ ]: