In [1]:
print 'hello world'
hello world
In [7]:
def add2(x):
y = x + 2
return y
In [8]:
i = 5
In [4]:
add2(i)
Out[4]:
7
In [5]:
square = lambda x : x*x
In [6]:
square(3)
Out[6]:
9
In [2]:
import graphlab
In [10]:
sf = graphlab.SFrame('board.csv')
Finished parsing file /Users/liubotong/Desktop/GitProject/code/ML_PY/board.csv
Parsing completed. Parsed 100 lines in 0.013068 secs.
------------------------------------------------------
Inferred types from first 100 line(s) of file as
column_type_hints=[int,str,str,float,float,float,float]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------
Finished parsing file /Users/liubotong/Desktop/GitProject/code/ML_PY/board.csv
Parsing completed. Parsed 942 lines in 0.015914 secs.
In [11]:
sf
Out[11]:
CityCode
BoardId
RoadName
StartLon
StartLat
EndLon
EndLat
110000
0010_103_001
北京城市内快速路
116.570446
40.041606
116.175351
39.753424
110000
0010_105_001
望京区域
116.503079
39.999301
116.440662
39.987544
110000
0010_105_002
中关村区域
116.317951
39.985662
116.305047
39.975411
110000
0010_105_003
北京CBD区域
116.49044
39.925039
116.43419
39.907748
110000
0010_105_004
西单金融街区域
116.374569
39.92415
116.355714
39.906941
110000
0010_105_005
工体三里屯区域
116.462315
39.941802
116.433189
39.922903
110000
0010_105_006
东单王府井区域
116.435875
39.924635
116.406065
39.908152
110000
0010_105_007
东直门区域
116.447498
39.951218
116.416441
39.933241
110000
0010_105_008
西直门区域
116.37341
39.948227
116.341493
39.931744
110000
0010_105_009
崇文门区域
116.436086
39.907991
116.411858
39.892235
[942 rows x 7 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
In [12]:
sf.head
Out[12]:
<bound method SFrame.head of Columns:
CityCode int
BoardId str
RoadName str
StartLon float
StartLat float
EndLon float
EndLat float
Rows: 942
Data:
+----------+--------------+------------------+------------+-----------+
| CityCode | BoardId | RoadName | StartLon | StartLat |
+----------+--------------+------------------+------------+-----------+
| 110000 | 0010_103_001 | 北京城市内快速路 | 116.570446 | 40.041606 |
| 110000 | 0010_105_001 | 望京区域 | 116.503079 | 39.999301 |
| 110000 | 0010_105_002 | 中关村区域 | 116.317951 | 39.985662 |
| 110000 | 0010_105_003 | 北京CBD区域 | 116.49044 | 39.925039 |
| 110000 | 0010_105_004 | 西单金融街区域 | 116.374569 | 39.92415 |
| 110000 | 0010_105_005 | 工体三里屯区域 | 116.462315 | 39.941802 |
| 110000 | 0010_105_006 | 东单王府井区域 | 116.435875 | 39.924635 |
| 110000 | 0010_105_007 | 东直门区域 | 116.447498 | 39.951218 |
| 110000 | 0010_105_008 | 西直门区域 | 116.37341 | 39.948227 |
| 110000 | 0010_105_009 | 崇文门区域 | 116.436086 | 39.907991 |
+----------+--------------+------------------+------------+-----------+
+------------+-----------+
| EndLon | EndLat |
+------------+-----------+
| 116.175351 | 39.753424 |
| 116.440662 | 39.987544 |
| 116.305047 | 39.975411 |
| 116.43419 | 39.907748 |
| 116.355714 | 39.906941 |
| 116.433189 | 39.922903 |
| 116.406065 | 39.908152 |
| 116.416441 | 39.933241 |
| 116.341493 | 39.931744 |
| 116.411858 | 39.892235 |
+------------+-----------+
[942 rows x 7 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.>
In [13]:
sf.tail
Out[13]:
<bound method SFrame.tail of Columns:
CityCode int
BoardId str
RoadName str
StartLon float
StartLat float
EndLon float
EndLat float
Rows: 942
Data:
+----------+--------------+------------------+------------+-----------+
| CityCode | BoardId | RoadName | StartLon | StartLat |
+----------+--------------+------------------+------------+-----------+
| 110000 | 0010_103_001 | 北京城市内快速路 | 116.570446 | 40.041606 |
| 110000 | 0010_105_001 | 望京区域 | 116.503079 | 39.999301 |
| 110000 | 0010_105_002 | 中关村区域 | 116.317951 | 39.985662 |
| 110000 | 0010_105_003 | 北京CBD区域 | 116.49044 | 39.925039 |
| 110000 | 0010_105_004 | 西单金融街区域 | 116.374569 | 39.92415 |
| 110000 | 0010_105_005 | 工体三里屯区域 | 116.462315 | 39.941802 |
| 110000 | 0010_105_006 | 东单王府井区域 | 116.435875 | 39.924635 |
| 110000 | 0010_105_007 | 东直门区域 | 116.447498 | 39.951218 |
| 110000 | 0010_105_008 | 西直门区域 | 116.37341 | 39.948227 |
| 110000 | 0010_105_009 | 崇文门区域 | 116.436086 | 39.907991 |
+----------+--------------+------------------+------------+-----------+
+------------+-----------+
| EndLon | EndLat |
+------------+-----------+
| 116.175351 | 39.753424 |
| 116.440662 | 39.987544 |
| 116.305047 | 39.975411 |
| 116.43419 | 39.907748 |
| 116.355714 | 39.906941 |
| 116.433189 | 39.922903 |
| 116.406065 | 39.908152 |
| 116.416441 | 39.933241 |
| 116.341493 | 39.931744 |
| 116.411858 | 39.892235 |
+------------+-----------+
[942 rows x 7 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.>
In [14]:
sf.show()
Canvas is accessible via web browser at the URL: http://localhost:55644/index.html
Opening Canvas in default web browser.
In [15]:
graphlab.canvas.set_target('ipynb')
In [19]:
sf['CityCode'].show(view='Categorical')
In [17]:
sf['CityCode']
Out[17]:
dtype: int
Rows: 942
[110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 110000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, 310000, ... ]
In [18]:
sf['BoardId']
Out[18]:
dtype: str
Rows: 942
['0010_103_001', '0010_105_001', '0010_105_002', '0010_105_003', '0010_105_004', '0010_105_005', '0010_105_006', '0010_105_007', '0010_105_008', '0010_105_009', '0010_105_010', '0010_105_011', '0010_105_012', '0010_106_001', '0010_106_002', '0010_106_003', '0010_106_004', '0010_106_005', '0010_106_006', '0010_106_007', '0010_106_008', '0010_106_009', '0010_106_010', '0010_106_011', '0010_106_012', '0010_106_013', '0010_106_014', '0010_106_015', '0010_106_016', '0010_106_017', '0010_106_018', '0010_106_019', '0010_106_020', '0010_106_021', '0010_106_022', '0010_106_023', '0010_106_024', '0010_106_025', '0010_106_026', '0010_106_027', '0010_106_028', '0010_106_029', '0010_106_030', '0010_106_031', '0010_106_032', '0010_106_033', '0010_106_034', '0010_106_035', '0010_106_036', '0010_106_037', '0010_106_038', '0010_106_039', '0010_106_040', '0010_106_041', '0010_106_042', '0010_106_043', '0010_106_044', '0010_106_045', '0010_106_046', '0010_106_047', '0010_106_048', '0021_103_001', '0021_105_001', '0021_105_002', '0021_105_003', '0021_105_004', '0021_105_005', '0021_105_006', '0021_105_007', '0021_105_008', '0021_106_001', '0021_106_002', '0021_106_003', '0021_106_004', '0021_106_005', '0021_106_006', '0021_106_007', '0021_106_008', '0021_106_009', '0021_106_010', '0021_106_011', '0021_106_012', '0021_106_013', '0021_106_014', '0021_106_015', '0021_106_016', '0021_106_017', '0021_106_018', '0021_106_019', '0021_106_020', '0021_106_021', '0021_106_022', '0021_106_023', '0021_106_024', '0021_106_025', '0021_106_026', '0021_106_027', '0021_106_028', '0021_106_029', '0021_106_030', ... ]
In [20]:
sf['CityCode'].mean()
Out[20]:
331469.42675159225
In [21]:
sf['CityCode'].max()
Out[21]:
610100
In [36]:
sf['StartLocation'] = str(sf['StartLon']) + ',' + sf['RoadName']
In [37]:
sf
Out[37]:
CityCode
BoardId
RoadName
StartLon
StartLat
EndLon
EndLat
110000
0010_103_001
北京城市内快速路
116.570446
40.041606
116.175351
39.753424
110000
0010_105_001
望京区域
116.503079
39.999301
116.440662
39.987544
110000
0010_105_002
中关村区域
116.317951
39.985662
116.305047
39.975411
110000
0010_105_003
北京CBD区域
116.49044
39.925039
116.43419
39.907748
110000
0010_105_004
西单金融街区域
116.374569
39.92415
116.355714
39.906941
110000
0010_105_005
工体三里屯区域
116.462315
39.941802
116.433189
39.922903
110000
0010_105_006
东单王府井区域
116.435875
39.924635
116.406065
39.908152
110000
0010_105_007
东直门区域
116.447498
39.951218
116.416441
39.933241
110000
0010_105_008
西直门区域
116.37341
39.948227
116.341493
39.931744
110000
0010_105_009
崇文门区域
116.436086
39.907991
116.411858
39.892235
StartLocation
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[116.570446, 116.503079,
116.317951, 116.49044, ...
[942 rows x 8 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
In [38]:
sf['CityCode'] +2
Out[38]:
dtype: int
Rows: 942
[110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 110002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, 310002, ... ]
In [39]:
sf['CityCode'] * sf['CityCode']
Out[39]:
dtype: int
Rows: 942
[12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 12100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, 96100000000, ... ]
In [ ]:
Content source: LTMana/code
Similar notebooks: