In [4]:
file = open('/Users/HansZeng/Desktop/watermelon-dataset.txt')
lines = file.readlines()

for line in lines:
    print line


编号,色泽,根蒂,敲声,纹理,脐部,触感,密度,含糖率,好瓜

1,青绿,蜷缩,浊响,清晰,凹陷,硬滑,0.697,0.46,是

2,乌黑,蜷缩,沉闷,清晰,凹陷,硬滑,0.774,0.376,是

3,乌黑,蜷缩,浊响,清晰,凹陷,硬滑,0.634,0.264,是

4,青绿,蜷缩,沉闷,清晰,凹陷,硬滑,0.608,0.318,是

5,浅白,蜷缩,浊响,清晰,凹陷,硬滑,0.556,0.215,是

6,青绿,稍蜷,浊响,清晰,稍凹,软粘,0.403,0.237,是

7,乌黑,稍蜷,浊响,稍糊,稍凹,软粘,0.481,0.149,是

8,乌黑,稍蜷,浊响,清晰,稍凹,硬滑,0.437,0.211,是

9,乌黑,稍蜷,沉闷,稍糊,稍凹,硬滑,0.666,0.091,否

10,青绿,硬挺,清脆,清晰,平坦,软粘,0.243,0.267,否

11,浅白,硬挺,清脆,模糊,平坦,硬滑,0.245,0.057,否

12,浅白,蜷缩,浊响,模糊,平坦,软粘,0.343,0.099,否

13,青绿,稍蜷,浊响,稍糊,凹陷,硬滑,0.639,0.161,否

14,浅白,稍蜷,沉闷,稍糊,凹陷,硬滑,0.657,0.198,否

15,乌黑,稍蜷,浊响,清晰,稍凹,软粘,0.36,0.37,否

16,浅白,蜷缩,浊响,模糊,平坦,硬滑,0.593,0.042,否

17,青绿,蜷缩,沉闷,稍糊,稍凹,硬滑,0.719,0.103,否


In [11]:



Out[11]:
True

In [67]:
import pandas as pd

def get_watermelon_dataset():
    file = open('/Users/HansZeng/Desktop/watermelon-dataset.txt')
    lines = file.readlines()
    m = []
    i = 0
    for line in lines:
        m.append(line.split(","))
        i = i + 1    
    df = pd.DataFrame(m[1:])
    l1 = ['是']*8
    l2 = ['否']*9
    l1.extend(l2)
    df[9] = l1
    df.columns = ['编号','色泽','根蒂','敲声','纹理','脐部','触感','密度','含糖率','好瓜']
    return df

get_watermelon_dataset()


Out[67]:
编号 色泽 根蒂 敲声 纹理 脐部 触感 密度 含糖率 好瓜
0 1 青绿 蜷缩 浊响 清晰 凹陷 硬滑 0.697 0.46
1 2 乌黑 蜷缩 沉闷 清晰 凹陷 硬滑 0.774 0.376
2 3 乌黑 蜷缩 浊响 清晰 凹陷 硬滑 0.634 0.264
3 4 青绿 蜷缩 沉闷 清晰 凹陷 硬滑 0.608 0.318
4 5 浅白 蜷缩 浊响 清晰 凹陷 硬滑 0.556 0.215
5 6 青绿 稍蜷 浊响 清晰 稍凹 软粘 0.403 0.237
6 7 乌黑 稍蜷 浊响 稍糊 稍凹 软粘 0.481 0.149
7 8 乌黑 稍蜷 浊响 清晰 稍凹 硬滑 0.437 0.211
8 9 乌黑 稍蜷 沉闷 稍糊 稍凹 硬滑 0.666 0.091
9 10 青绿 硬挺 清脆 清晰 平坦 软粘 0.243 0.267
10 11 浅白 硬挺 清脆 模糊 平坦 硬滑 0.245 0.057
11 12 浅白 蜷缩 浊响 模糊 平坦 软粘 0.343 0.099
12 13 青绿 稍蜷 浊响 稍糊 凹陷 硬滑 0.639 0.161
13 14 浅白 稍蜷 沉闷 稍糊 凹陷 硬滑 0.657 0.198
14 15 乌黑 稍蜷 浊响 清晰 稍凹 软粘 0.36 0.37
15 16 浅白 蜷缩 浊响 模糊 平坦 硬滑 0.593 0.042
16 17 青绿 蜷缩 沉闷 稍糊 稍凹 硬滑 0.719 0.103

In [59]:
pwd


Out[59]:
u'/Users/HansZeng/Desktop/machine-learning-zhzhou'

In [ ]: