In [10]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import cross_val_score
import numpy as np
from skimage.io import imread, imshow, imsave
from skimage.feature import blob_doh
from skimage.color import rgb2gray
from skimage.transform import resize
from glob import glob
import os
from sklearn.preprocessing import OneHotEncoder
from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import CountVectorizer
In [2]:
trainlabel = pd.read_csv('../Data/train.csv')
In [3]:
labels = trainlabel['whaleID']
In [5]:
enc = OneHotEncoder()
In [8]:
vec = DictVectorizer()
In [11]:
vectorizer = CountVectorizer(min_df=1)
In [12]:
X = vectorizer.fit_transform(labels)
In [15]:
X.toarray().shape
Out[15]:
In [ ]: