In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
#使用DictVectorizer对特征进行抽取和量化
measurements=[{'city':'Dubai','temperature':33.},
              {'city':'London','temperature':12.},
              {'city':'San Fransisco','temperature':18.}]

In [3]:
from sklearn.feature_extraction import DictVectorizer

In [4]:
vec = DictVectorizer()

In [5]:
print vec.fit_transform(measurements).toarray()


[[  1.   0.   0.  33.]
 [  0.   1.   0.  12.]
 [  0.   0.   1.  18.]]

In [6]:
print vec.get_feature_names()


['city=Dubai', 'city=London', 'city=San Fransisco', 'temperature']

In [7]:
#从上面可以看出,特征向量化就是将类别的类型作为新的特征处理,数值型的直接作为特征

In [ ]: