In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
In [2]:
#使用DictVectorizer对特征进行抽取和量化
measurements=[{'city':'Dubai','temperature':33.},
{'city':'London','temperature':12.},
{'city':'San Fransisco','temperature':18.}]
In [3]:
from sklearn.feature_extraction import DictVectorizer
In [4]:
vec = DictVectorizer()
In [5]:
print vec.fit_transform(measurements).toarray()
In [6]:
print vec.get_feature_names()
In [7]:
#从上面可以看出,特征向量化就是将类别的类型作为新的特征处理,数值型的直接作为特征
In [ ]: