In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [3]:
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.datasets import samples_generator
from sklearn import svm
from sklearn.pipeline import Pipeline
from sklearn.pipeline import FeatureUnion
from sklearn.cross_validation import StratifiedKFold, permutation_test_score

In [4]:
from utils.sax import SAX
from utils.SAXTransformer import SAXTransformer

In [16]:
X = []
y = []
with open('data/synthetic_control_TRAIN.txt') as infile:
    for i in infile:
        instance = []
        for j in i.split():
            instance.append(float(j))
        X.append(np.array(instance[1:]))
        y.append(instance[0])
X = np.array(X)
y = np.array(y)

In [22]:
df = pd.DataFrame(X)
df


Out[22]:
0 1 2 3 4 5 6 7 8 9 ... 50 51 52 53 54 55 56 57 58 59
0 -0.376936 1.224864 0.343874 0.328454 -0.337609 1.026514 -1.330996 -0.657800 1.446077 -0.846397 ... 0.370712 -1.568165 1.021863 -1.429748 1.356966 1.372781 0.663558 0.918346 -1.311742 -1.197146
1 0.644406 0.413269 -0.862278 -1.497386 -0.421458 -0.214215 -1.292131 0.956898 -1.216140 -0.588536 ... 1.317268 -0.802528 -1.121287 0.989319 -1.109687 0.219856 0.630034 1.398140 0.085742 0.024997
2 -0.978667 -0.406232 0.822429 -1.408242 -1.676734 -0.490632 1.404598 1.168600 -1.703254 0.972810 ... -0.828783 -0.762884 1.382972 -0.869103 1.277252 -0.198967 0.966809 0.666129 0.106996 0.082715
3 -0.236537 -0.098175 0.367092 1.366860 0.498281 -0.300223 1.715634 0.902108 -0.218132 -1.430721 ... 0.068904 -1.499604 0.406744 1.289153 -1.119731 -0.995985 1.651135 -1.187098 0.025463 -0.841518
4 1.490735 1.325243 1.626829 -0.317056 -1.098990 -0.022213 0.596980 1.667339 -0.206153 -1.376627 ... 0.815065 1.029995 -1.496161 -1.167442 1.484208 -0.373020 -0.843281 -0.209794 0.137724 0.716542
5 1.643316 1.764011 0.437756 0.199310 -1.567616 -0.719117 -1.094189 1.033176 -1.377166 -0.948905 ... 0.282082 -1.368681 0.097894 -0.425231 0.575449 -0.912021 1.317424 -0.397265 0.209267 -0.045311
6 1.184702 0.788449 -1.742959 -0.963561 -1.267327 1.133356 0.206078 -0.428372 -0.979484 -1.488956 ... 0.704026 -0.037163 1.557496 1.070896 -1.713899 -1.275625 0.184349 -0.764247 0.628312 0.754082
7 -1.523098 0.995921 1.696381 -0.253934 -0.196881 0.858227 0.145158 -1.449185 -1.416422 1.316850 ... 0.430313 -0.853009 0.795066 1.647490 -0.632845 1.486350 -1.046602 0.699480 1.236125 -0.976517
8 -1.370494 -0.695313 -0.844815 -1.359326 1.522725 -1.155992 -0.086756 0.800240 -1.057486 0.173543 ... -0.287343 -0.401065 -1.717156 1.299328 0.313944 0.601898 -1.723981 0.708323 0.067532 1.184749
9 -0.824564 0.541666 0.320328 -0.362941 -0.948632 0.852671 -1.555186 0.311272 1.752519 1.540855 ... -0.865549 -1.610791 -0.019699 -1.305691 0.449620 0.588895 -0.319360 -0.527314 1.650272 -0.828984
10 -1.175847 1.214149 -1.311968 -1.577644 -0.826284 -1.297805 -1.309377 0.095824 -1.278990 0.037524 ... 1.654224 -0.358090 -0.764421 -1.489635 1.250041 1.827444 0.699256 1.010865 0.677968 -0.851046
11 0.512565 0.419255 1.208409 1.028563 1.303379 -0.178350 -0.508625 -1.423431 0.227785 1.537814 ... 0.431064 1.562407 1.503152 1.425921 0.135266 1.163599 -1.290609 0.919781 -1.090731 -0.199938
12 -0.275532 0.796881 1.130192 1.536720 0.444744 -0.056489 -2.124858 -0.310393 0.640376 0.868848 ... -1.545083 0.648490 -0.569492 1.089558 0.338879 0.974360 -0.328416 0.997804 1.288814 -0.033751
13 0.935475 0.889252 -0.857636 -1.034085 -1.185071 -1.283998 0.357616 0.420726 -1.355498 -0.658464 ... 0.298163 1.571469 -0.147964 1.070757 0.529925 1.121605 -1.474081 -0.516459 1.292328 -1.496991
14 1.163575 -0.544958 0.682241 -0.745874 0.248983 -1.367687 0.027031 -1.469814 0.789101 0.421428 ... 0.731715 0.036373 1.293886 1.185023 0.368585 1.555270 -1.348082 1.402431 -1.853915 -0.919273
15 -0.864947 1.454779 -0.821804 -1.578166 -0.820126 0.966642 -0.503750 1.014549 -0.021405 -0.131429 ... -1.235969 -0.206754 0.760426 -0.020512 0.329047 0.296567 0.836048 -1.181214 -1.600035 1.357774
16 1.373763 -1.275270 0.655254 -0.069390 -1.308730 1.251184 0.882781 -1.099739 0.709878 1.564404 ... 1.543240 -1.086674 0.372572 -0.804177 1.453642 0.667443 -0.676526 -1.154151 1.541779 -0.700399
17 -1.260423 1.466464 -0.547978 -1.683989 0.990511 0.212970 0.727742 -0.033511 -1.111252 0.218769 ... 1.326672 -0.894517 0.640421 1.546876 0.231922 1.239961 1.527314 -1.119299 -0.589238 -0.088951
18 -1.343327 0.190683 -0.915430 0.493999 0.052807 1.324177 -1.484093 -0.455041 0.660807 0.218277 ... 0.178940 -0.936452 1.045564 1.464061 1.755298 -1.615915 0.408206 1.285509 -1.354735 0.812160
19 -0.711199 -1.293888 1.394605 0.263612 0.683074 -1.536496 1.019617 0.450990 1.713260 1.232247 ... -0.147558 -0.269877 -0.143934 -0.849377 0.170459 1.407274 0.975027 1.497324 1.236393 -0.592272
20 0.998438 0.010591 0.051387 1.232272 1.247846 -0.366884 0.103733 0.901161 -0.262413 0.849449 ... 0.543340 0.471367 -1.542476 -1.160563 0.307879 -0.358559 0.457998 -0.349656 1.207160 -1.436103
21 0.530381 -0.668505 0.555085 1.175591 0.247205 -0.392256 1.260225 -1.432780 0.772346 0.151074 ... 0.101165 -1.578260 0.873666 -1.011229 0.415767 -1.132477 -1.438175 -0.733359 -1.360674 -1.170151
22 1.209642 1.358040 1.727919 -1.284312 -0.357169 1.692404 -0.509908 -0.110882 -1.318321 0.768647 ... 0.938406 -0.156383 -1.352244 1.511936 1.012590 0.514083 -1.383070 -0.902421 -1.495952 -1.446313
23 0.724569 0.992426 -1.090382 0.102660 -0.681565 -1.208842 -1.919883 0.843060 0.112178 1.492692 ... 0.354477 -0.968584 -0.313173 0.265840 0.064739 0.154859 -1.562369 0.156559 0.727412 1.121116
24 -0.268848 1.911734 -1.148335 -0.298710 -1.132138 0.203322 0.917081 1.633279 1.941566 0.067112 ... -1.162782 -1.042675 0.140033 1.706168 -0.530793 -0.059873 0.644379 -1.280824 1.248132 2.047663
25 -1.484351 -0.785003 0.182506 -1.291101 0.444604 0.249626 0.215305 -0.551545 0.206168 -0.840497 ... -0.285933 -1.176452 1.741829 1.646566 -1.149451 -1.108482 0.827004 -0.351530 0.937027 -1.145058
26 -1.269741 -1.043208 0.945561 -0.077906 -0.104779 -1.453267 1.518538 0.487803 0.196732 0.982711 ... -0.639738 0.294109 0.892575 -0.512858 -1.502267 1.455085 -1.286689 1.254258 0.301484 -0.384705
27 -1.345260 -1.192002 0.441649 1.251280 0.040124 -0.707149 1.088061 1.010849 -0.303295 -1.568245 ... 0.240246 -1.267612 -1.022462 -1.806230 -0.961823 0.722273 -0.302013 -1.524819 0.428600 -0.288878
28 1.799137 -0.550966 0.897647 -0.519890 0.216201 -1.506052 -0.138867 0.787497 0.123578 -0.330094 ... 0.513252 1.555841 -0.674315 -0.880460 0.503201 -1.669129 -0.514164 0.816919 -0.917961 -0.755838
29 0.075424 0.002187 -1.539788 1.238665 -1.498312 1.241067 0.092778 -1.128174 1.377143 -1.526472 ... -1.130493 1.004604 0.710894 -0.391308 0.854092 -0.573362 0.199686 -0.199322 -0.282081 -1.429087
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
270 0.693533 0.336936 0.314176 -0.054203 0.845034 -0.138991 0.138383 1.081019 1.320044 1.901741 ... -1.354448 -1.030520 0.462426 -1.612217 -0.925170 0.542433 0.633322 0.328062 -1.028760 -0.291640
271 1.553505 1.447669 1.691212 1.348695 1.450091 1.002545 0.500227 1.854059 1.330638 1.452158 ... -0.456903 -0.027280 -0.531971 -0.934373 -0.439200 -0.278053 -0.552549 -0.345169 0.112665 0.012664
272 0.060546 0.947275 1.767715 0.557307 1.320836 1.257246 0.254468 1.542317 0.140770 1.337181 ... -1.847822 -1.591464 0.156827 0.283388 -1.654209 -1.351658 -1.062135 -0.446831 -0.889093 -0.257052
273 0.331645 1.354371 1.080032 0.557326 0.397578 1.194133 0.517856 1.343859 0.322843 0.355059 ... -0.421454 -0.668045 -1.015987 -1.255607 -0.662398 -0.933582 -0.652651 -1.078898 -1.483230 -1.308082
274 0.853049 0.380466 1.110460 0.454124 1.147169 0.444658 0.567308 1.455448 0.436779 1.412526 ... -0.959834 -0.544383 -1.083618 -0.819071 -0.436422 -0.515447 -0.494534 -1.356151 -0.713871 -0.580091
275 1.587592 0.937045 0.203236 1.330903 1.585494 0.528824 -0.308439 -0.248508 -0.360857 1.138359 ... -0.322301 -1.599115 -0.613637 -1.071497 -0.953105 -0.080944 0.156378 -1.750969 -0.514595 0.002102
276 0.474543 1.463514 0.664824 1.390514 1.586898 1.165709 0.668478 0.596785 0.434861 0.392211 ... -1.186097 -0.759535 -1.181447 -0.469702 -1.380300 -1.634880 -0.843466 -0.861880 -0.827305 -1.401607
277 1.812364 0.983723 1.158263 1.523850 1.514194 1.396586 1.473005 1.345374 0.729658 0.943108 ... 0.034319 -0.224128 -1.314849 -0.294508 -0.851381 -0.650263 -0.594511 -0.118455 -0.792505 -0.456715
278 0.424390 0.496525 0.489231 1.390912 1.230207 1.171173 2.182840 2.245100 0.763918 1.272395 ... -1.278360 -0.738251 -1.127924 -1.344115 -0.226350 -1.475481 -0.558172 0.125024 -0.298360 0.194382
279 0.337007 1.734533 1.681828 0.494425 0.385980 1.165555 1.581474 1.150102 0.309171 1.151102 ... -0.210597 -0.148189 -2.056933 -1.149592 -1.308248 -0.398786 -1.243092 -0.647703 -0.620918 -1.285517
280 -0.508647 0.203090 -0.301114 1.097649 0.589860 1.049308 -0.025958 0.346135 0.655993 1.526098 ... -1.980461 -0.204118 -0.602978 -2.353553 -1.391410 -0.353786 -1.373363 -0.211309 -0.545701 -0.202903
281 1.300421 1.282511 0.501704 0.403188 0.936324 0.788908 0.986904 0.810403 0.327012 0.561521 ... -1.603539 -1.018826 -0.440809 -0.952545 -1.579890 -1.259458 -0.964192 -0.284063 -1.109410 -0.925963
282 0.876021 0.964189 0.980474 1.163220 1.094635 0.734070 0.268186 0.346197 1.231877 0.593070 ... -1.351913 -1.114362 -1.104122 -1.449476 -1.891623 -0.797485 -1.315502 -1.068086 -1.892658 -1.611132
283 0.118799 0.889472 0.709761 1.101568 0.758967 0.412104 1.440211 0.850789 0.776578 1.083117 ... -1.310849 -1.144690 -0.750860 -1.523696 -1.266173 -0.240687 -0.915632 -1.155849 -1.569021 -1.468318
284 0.987800 1.755330 1.273615 1.695207 1.367180 0.858529 1.893642 1.952042 0.429874 1.887508 ... -0.895954 -0.344551 -0.265248 -0.764402 -0.084556 0.049666 -0.953662 -0.685843 -0.980175 0.177196
285 1.640048 0.457685 1.027842 1.288748 1.811037 1.271028 1.520211 0.475453 1.195247 1.099978 ... -0.262813 -1.342934 -0.208529 -0.574657 -0.901820 -0.727449 -0.113487 -0.795331 -0.846138 -0.712835
286 1.820585 0.923731 1.211736 1.525482 0.779063 1.075045 1.779380 0.799778 1.503837 1.843649 ... -0.463621 -0.024226 -0.964194 -0.968628 -0.262861 -0.012003 -0.798584 -0.652119 -0.354202 -0.112791
287 0.709331 1.032259 0.824723 1.955907 0.711254 0.634161 0.374651 0.634748 1.757492 0.475327 ... -0.026973 -0.092635 -0.562431 -0.493084 -0.319240 -1.516282 -1.325764 -0.320870 -0.668209 -0.090779
288 1.898080 1.011445 1.585312 0.721400 1.718678 1.590322 0.159375 0.740917 2.084904 1.758202 ... -0.268304 -0.634834 -0.354100 -1.103825 -1.174945 -1.066292 -1.194092 -0.042007 -0.758534 -0.652242
289 -0.134751 1.689514 0.525538 1.579188 1.491051 -0.181860 0.784123 1.527880 -0.234634 1.042581 ... 0.142457 -0.272966 -0.529035 -0.427089 -1.725244 -0.035646 -1.455619 -0.333721 -1.376729 -0.274504
290 0.947571 1.045224 1.280352 1.166374 1.593510 1.001718 0.667217 1.600279 0.507579 0.704017 ... -0.058286 -0.220046 -1.188638 -0.838060 -0.496977 -0.858304 -0.439810 -0.864453 -1.381173 -1.223394
291 0.899774 0.511203 -0.064804 1.195401 0.898803 0.092994 1.062571 0.788133 1.203837 0.007135 ... -0.496199 -0.864591 -1.671988 -1.678260 -0.661987 -1.258554 -0.267578 -0.783258 -1.265154 -1.078360
292 1.466291 1.401358 0.890413 0.637260 0.540565 0.999387 0.860492 0.906255 0.428833 1.784884 ... -0.146449 -0.435276 -1.208402 -1.386830 0.056563 -0.490119 -0.889240 -0.467283 -1.328606 -1.207029
293 1.213886 1.005379 0.524086 0.516285 0.949565 1.463143 0.909321 1.295207 0.938458 1.243282 ... -0.249001 -0.722244 -1.198085 -1.481894 -1.031969 -1.479087 -1.263724 -1.248447 -1.215835 -0.297171
294 1.511380 1.105290 1.110977 0.920534 0.940863 0.063350 0.051428 1.705281 0.604451 1.417216 ... -1.045283 -0.688617 -1.509790 -0.486498 -0.564905 -1.907605 -0.557404 -0.482625 -0.221068 -1.265369
295 0.805841 0.137184 1.039280 0.175289 0.875453 0.508280 1.272193 0.107335 0.652739 1.153325 ... -0.976683 -0.849044 -1.513174 -0.636618 -1.004551 -1.107356 -0.761440 -1.223342 -1.309249 -1.273690
296 0.417909 1.544827 1.693856 1.115731 0.954563 0.625996 0.746136 1.096354 1.213999 0.819162 ... -1.465301 -0.418516 -1.150362 -0.445236 -0.359302 -1.197046 -0.079914 -0.054488 -0.751970 -0.954400
297 0.965901 0.640161 0.480857 0.462761 0.800787 0.507464 0.629943 0.361362 1.270979 0.429349 ... -1.422507 -1.498756 -1.234976 -1.537108 -0.481013 -1.469285 -1.371460 -0.683358 -1.195676 -0.803644
298 0.354971 0.109652 0.238897 0.910018 0.198167 0.360869 0.728878 1.268917 0.372435 0.752898 ... -0.655457 -1.586054 -0.925246 -0.757277 -1.570425 -1.262181 -0.518927 -1.625361 -1.765371 -1.675383
299 0.646617 0.613516 1.126664 0.979204 0.919017 1.617676 1.707450 1.409003 2.098429 0.755410 ... 0.018942 -1.022475 -0.522985 -0.875489 -0.818226 -1.444264 -0.956029 -0.023957 -0.833347 -0.261178

300 rows × 60 columns


In [ ]: