In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.color import lab2rgb
from sklearn import model_selection
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
import sys
from sklearn.metrics import accuracy_score
from skimage import color
from sklearn import pipeline
from sklearn import preprocessing
from sklearn import svm
from sklearn.preprocessing import StandardScaler

In [7]:
filename1 = "monthly-data-labelled.csv"
filename2 = "monthly-data-unlabelled.csv"
filename3 = "labels.csv"
data = pd.read_csv(filename1)
unlabelled_data = pd.read_csv(filename2)

training_columns = data.columns.tolist()
training_columns.remove("city")
training_columns.remove("year")

# training_columns
X_train,X_test,y_train,y_test = model_selection.train_test_split(data[training_columns].values,data["city"].values)

svc_model = pipeline.make_pipeline(StandardScaler(),svm.SVC(C=1.0,kernel="linear", decision_function_shape="ovr"))

svc_model.fit(X_train,y_train)
Y_predicted_svc = svc_model.predict(X_test)
print(accuracy_score(y_test, Y_predicted_svc))
df = pd.DataFrame({'truth': y_test, 'prediction': svc_model.predict(X_test)})
print(df[df['truth'] != df['prediction']])


0.806896551724
         prediction      truth
2           Seattle   Victoria
6     Atlantic City    Chicago
17           Regina   Winnipeg
18         Portland    Seattle
22         Edmonton   Winnipeg
24         Edmonton   Winnipeg
25         Edmonton    Calgary
27        Vancouver   Victoria
41           London    Chicago
49         Montreal     Ottawa
59           Regina   Winnipeg
60        Saskatoon   Winnipeg
65          Seattle  Vancouver
67           Regina   Winnipeg
73         Victoria  Vancouver
84          Seattle   Portland
87        Saskatoon     Regina
90           Regina   Winnipeg
93         Victoria    Seattle
95           London     Ottawa
98           London     Ottawa
99          Seattle  Vancouver
104         Toronto     London
106       Vancouver   Victoria
109        Victoria  Vancouver
112         Seattle   Victoria
114        Edmonton    Calgary
117          London    Toronto
119          Regina   Edmonton
120         Seattle   Portland
128       Saskatoon     Regina
130          Gander     Québec
131         Chicago    Toronto
143       Vancouver    Seattle
154   Atlantic City    Chicago
161         Chicago     Ottawa
172        Montreal     Québec
174         Seattle   Victoria
175         Seattle  Vancouver
185         Toronto    Chicago
202        Portland    Seattle
204       Saskatoon   Winnipeg
208  Raleigh Durham    Atlanta
211          Ottawa   Montreal
212        Winnipeg     Regina
213          London    Toronto
217        Montreal     Ottawa
224         Toronto     London
226         Chicago     London
237          Regina   Winnipeg
238          Regina   Winnipeg
246       Saskatoon     Regina
258       Saskatoon     Regina
262  Raleigh Durham    Atlanta
265          Regina   Winnipeg
282          London    Toronto

In [5]:
predictions = svc_model.predict(unlabelled_data[training_columns].values)
pd.Series(predictions).to_csv(filename3, index=False)