In [1]:
print 'hello world'


hello world

method in python


In [7]:
def add2(x):
    y = x + 2
    return y

In [8]:
i = 5

In [4]:
add2(i)


Out[4]:
7

In [5]:
square = lambda x : x*x

In [6]:
square(3)


Out[6]:
9

使用GraphLab Create


In [2]:
import graphlab

In [10]:
sf = graphlab.SFrame('board.csv')


Finished parsing file /Users/liubotong/Desktop/GitProject/code/ML_PY/board.csv
Parsing completed. Parsed 100 lines in 0.013068 secs.
------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[int,str,str,float,float,float,float]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
Finished parsing file /Users/liubotong/Desktop/GitProject/code/ML_PY/board.csv
Parsing completed. Parsed 942 lines in 0.015914 secs.

In [11]:
sf


Out[11]:
CityCode BoardId RoadName StartLon StartLat EndLon EndLat
110000 0010_103_001 北京城市内快速路 116.570446 40.041606 116.175351 39.753424
110000 0010_105_001 望京区域 116.503079 39.999301 116.440662 39.987544
110000 0010_105_002 中关村区域 116.317951 39.985662 116.305047 39.975411
110000 0010_105_003 北京CBD区域 116.49044 39.925039 116.43419 39.907748
110000 0010_105_004 西单金融街区域 116.374569 39.92415 116.355714 39.906941
110000 0010_105_005 工体三里屯区域 116.462315 39.941802 116.433189 39.922903
110000 0010_105_006 东单王府井区域 116.435875 39.924635 116.406065 39.908152
110000 0010_105_007 东直门区域 116.447498 39.951218 116.416441 39.933241
110000 0010_105_008 西直门区域 116.37341 39.948227 116.341493 39.931744
110000 0010_105_009 崇文门区域 116.436086 39.907991 116.411858 39.892235
[942 rows x 7 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.

In [12]:
sf.head


Out[12]:
<bound method SFrame.head of Columns:
	CityCode	int
	BoardId	str
	RoadName	str
	StartLon	float
	StartLat	float
	EndLon	float
	EndLat	float

Rows: 942

Data:
+----------+--------------+------------------+------------+-----------+
| CityCode |   BoardId    |     RoadName     |  StartLon  |  StartLat |
+----------+--------------+------------------+------------+-----------+
|  110000  | 0010_103_001 | 北京城市内快速路 | 116.570446 | 40.041606 |
|  110000  | 0010_105_001 |     望京区域     | 116.503079 | 39.999301 |
|  110000  | 0010_105_002 |    中关村区域    | 116.317951 | 39.985662 |
|  110000  | 0010_105_003 |   北京CBD区域    | 116.49044  | 39.925039 |
|  110000  | 0010_105_004 |  西单金融街区域  | 116.374569 |  39.92415 |
|  110000  | 0010_105_005 |  工体三里屯区域  | 116.462315 | 39.941802 |
|  110000  | 0010_105_006 |  东单王府井区域  | 116.435875 | 39.924635 |
|  110000  | 0010_105_007 |    东直门区域    | 116.447498 | 39.951218 |
|  110000  | 0010_105_008 |    西直门区域    | 116.37341  | 39.948227 |
|  110000  | 0010_105_009 |    崇文门区域    | 116.436086 | 39.907991 |
+----------+--------------+------------------+------------+-----------+
+------------+-----------+
|   EndLon   |   EndLat  |
+------------+-----------+
| 116.175351 | 39.753424 |
| 116.440662 | 39.987544 |
| 116.305047 | 39.975411 |
| 116.43419  | 39.907748 |
| 116.355714 | 39.906941 |
| 116.433189 | 39.922903 |
| 116.406065 | 39.908152 |
| 116.416441 | 39.933241 |
| 116.341493 | 39.931744 |
| 116.411858 | 39.892235 |
+------------+-----------+
[942 rows x 7 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.>

In [13]:
sf.tail


Out[13]:
<bound method SFrame.tail of Columns:
	CityCode	int
	BoardId	str
	RoadName	str
	StartLon	float
	StartLat	float
	EndLon	float
	EndLat	float

Rows: 942

Data:
+----------+--------------+------------------+------------+-----------+
| CityCode |   BoardId    |     RoadName     |  StartLon  |  StartLat |
+----------+--------------+------------------+------------+-----------+
|  110000  | 0010_103_001 | 北京城市内快速路 | 116.570446 | 40.041606 |
|  110000  | 0010_105_001 |     望京区域     | 116.503079 | 39.999301 |
|  110000  | 0010_105_002 |    中关村区域    | 116.317951 | 39.985662 |
|  110000  | 0010_105_003 |   北京CBD区域    | 116.49044  | 39.925039 |
|  110000  | 0010_105_004 |  西单金融街区域  | 116.374569 |  39.92415 |
|  110000  | 0010_105_005 |  工体三里屯区域  | 116.462315 | 39.941802 |
|  110000  | 0010_105_006 |  东单王府井区域  | 116.435875 | 39.924635 |
|  110000  | 0010_105_007 |    东直门区域    | 116.447498 | 39.951218 |
|  110000  | 0010_105_008 |    西直门区域    | 116.37341  | 39.948227 |
|  110000  | 0010_105_009 |    崇文门区域    | 116.436086 | 39.907991 |
+----------+--------------+------------------+------------+-----------+
+------------+-----------+
|   EndLon   |   EndLat  |
+------------+-----------+
| 116.175351 | 39.753424 |
| 116.440662 | 39.987544 |
| 116.305047 | 39.975411 |
| 116.43419  | 39.907748 |
| 116.355714 | 39.906941 |
| 116.433189 | 39.922903 |
| 116.406065 | 39.908152 |
| 116.416441 | 39.933241 |
| 116.341493 | 39.931744 |
| 116.411858 | 39.892235 |
+------------+-----------+
[942 rows x 7 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.>

GraphLab Canvas


In [14]:
sf.show()


Canvas is accessible via web browser at the URL: http://localhost:55644/index.html
Opening Canvas in default web browser.

In [15]:
graphlab.canvas.set_target('ipynb')

In [19]:
sf['CityCode'].show(view='Categorical')


操作列数据


In [17]:
sf['CityCode']


Out[17]:
dtype: int
Rows: 942
[110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, ... ]

In [18]:
sf['BoardId']


Out[18]:
dtype: str
Rows: 942
['0010_103_001', '0010_105_001', '0010_105_002', '0010_105_003', '0010_105_004', '0010_105_005', '0010_105_006', '0010_105_007', '0010_105_008', '0010_105_009', '0010_105_010', '0010_105_011', '0010_105_012', '0010_106_001', '0010_106_002', '0010_106_003', '0010_106_004', '0010_106_005', '0010_106_006', '0010_106_007', '0010_106_008', '0010_106_009', '0010_106_010', '0010_106_011', '0010_106_012', '0010_106_013', '0010_106_014', '0010_106_015', '0010_106_016', '0010_106_017', '0010_106_018', '0010_106_019', '0010_106_020', '0010_106_021', '0010_106_022', '0010_106_023', '0010_106_024', '0010_106_025', '0010_106_026', '0010_106_027', '0010_106_028', '0010_106_029', '0010_106_030', '0010_106_031', '0010_106_032', '0010_106_033', '0010_106_034', '0010_106_035', '0010_106_036', '0010_106_037', '0010_106_038', '0010_106_039', '0010_106_040', '0010_106_041', '0010_106_042', '0010_106_043', '0010_106_044', '0010_106_045', '0010_106_046', '0010_106_047', '0010_106_048', '0021_103_001', '0021_105_001', '0021_105_002', '0021_105_003', '0021_105_004', '0021_105_005', '0021_105_006', '0021_105_007', '0021_105_008', '0021_106_001', '0021_106_002', '0021_106_003', '0021_106_004', '0021_106_005', '0021_106_006', '0021_106_007', '0021_106_008', '0021_106_009', '0021_106_010', '0021_106_011', '0021_106_012', '0021_106_013', '0021_106_014', '0021_106_015', '0021_106_016', '0021_106_017', '0021_106_018', '0021_106_019', '0021_106_020', '0021_106_021', '0021_106_022', '0021_106_023', '0021_106_024', '0021_106_025', '0021_106_026', '0021_106_027', '0021_106_028', '0021_106_029', '0021_106_030', ... ]

In [20]:
sf['CityCode'].mean()


Out[20]:
331469.42675159225

In [21]:
sf['CityCode'].max()


Out[21]:
610100

增加新列


In [36]:
sf['StartLocation'] = str(sf['StartLon']) + ',' + sf['RoadName']

In [37]:
sf


Out[37]:
CityCode BoardId RoadName StartLon StartLat EndLon EndLat
110000 0010_103_001 北京城市内快速路 116.570446 40.041606 116.175351 39.753424
110000 0010_105_001 望京区域 116.503079 39.999301 116.440662 39.987544
110000 0010_105_002 中关村区域 116.317951 39.985662 116.305047 39.975411
110000 0010_105_003 北京CBD区域 116.49044 39.925039 116.43419 39.907748
110000 0010_105_004 西单金融街区域 116.374569 39.92415 116.355714 39.906941
110000 0010_105_005 工体三里屯区域 116.462315 39.941802 116.433189 39.922903
110000 0010_105_006 东单王府井区域 116.435875 39.924635 116.406065 39.908152
110000 0010_105_007 东直门区域 116.447498 39.951218 116.416441 39.933241
110000 0010_105_008 西直门区域 116.37341 39.948227 116.341493 39.931744
110000 0010_105_009 崇文门区域 116.436086 39.907991 116.411858 39.892235
StartLocation
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[942 rows x 8 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.


In [38]:
sf['CityCode'] +2


Out[38]:
dtype: int
Rows: 942
[110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, ... ]

In [39]:
sf['CityCode'] * sf['CityCode']


Out[39]:
dtype: int
Rows: 942
[12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, ... ]

应用apply函数来转换数据


In [ ]: