In [1]:
list_a = [x for x in range(10)]
In [2]:
sc.parallelize(list_a).map(lambda x: x+1).reduce(lambda x , y :x+y)
Out[2]:
55
In [3]:
from pyspark.sql import HiveContext, Row
sqlContext= HiveContext(sc)
In [4]:
sqlContext.sql("show tables")
Out[4]:
DataFrame[tableName: string, isTemporary: boolean]
In [5]:
df = sc.parallelize([Row(product="thin",category="cellphone",rev="6000"),Row(product="normal",category="tablet",rev="1500"), Row(product="mini",category="tablet",rev="5500"),Row(product="ultra thin",category="cellphone",rev="5000"),Row(product="very thing",category="cellphone",rev="6000"),Row(product="big",category="tablet",rev="2500"),Row(product="bendable",category="cellphone",rev="3000"),Row(product="foldable",category="cellphone",rev="3000"),Row(product="pro",category="tablet",rev="4500"),Row(product="pro2",category="tablet",rev="6500")])
productRevenue_df = sqlContext.createDataFrame(df)
productRevenue_df.show()
+---------+----------+----+
| category| product| rev|
+---------+----------+----+
|cellphone| thin|6000|
| tablet| normal|1500|
| tablet| mini|5500|
|cellphone|ultra thin|5000|
|cellphone|very thing|6000|
| tablet| big|2500|
|cellphone| bendable|3000|
|cellphone| foldable|3000|
| tablet| pro|4500|
| tablet| pro2|6500|
+---------+----------+----+
In [16]:
!pwd
/home
In [17]:
!ls
Untitled.ipynb derby.log metastore_db
In [8]:
from sklearn.datasets import make_blobs
from pyspark.mllib.clustering import KMeans
In [9]:
X, y = make_blobs(50,20, 5)
In [10]:
print X
[[ 8.79265169 8.69813553 7.26584523 -0.95237571 7.66482599
-1.84022236 4.62125024 2.5017508 -10.41165513 2.09050148
3.17796425 7.85348358 10.24431579 -1.88136637 4.73685396
-4.22206454 1.3553198 8.96417314 0.96634182 -0.1082746 ]
[ -0.57021603 -2.95011861 2.8014989 -3.77173248 2.4647076
5.81325698 1.42435662 -7.04384883 -3.05966505 1.6740405
-9.84026239 8.24840516 -5.1131863 -2.22311618 0.95482552
-3.38819958 -2.10016993 2.59753474 1.76189002 9.40326368]
[ -2.28491968 4.78324522 8.63483306 -4.35186232 -4.60251651
-7.03618689 -8.32221764 0.66676661 7.53789095 -0.7162713
-10.19716729 -11.58301531 -9.63346717 1.21774221 -3.09186347
-1.61226172 -3.38826404 -5.90932636 -0.85335682 -8.50578432]
[ -1.46765634 -6.44533401 3.57919445 -5.77122572 1.09529359
6.27581505 0.95850395 -4.09758186 -4.37011787 0.27320859
-9.57875777 8.62468737 -5.50143944 -1.92456555 -0.96637844
-1.17558067 -0.5957673 2.87336084 0.24401986 8.87688196]
[ -9.49650876 -1.0219336 5.35451856 -7.49414541 -9.12113299
-3.84506743 -2.52210559 -8.34878782 -5.95092744 8.55325433
-6.49382995 9.91160201 -8.59122399 -1.56711779 4.7464535
0.10183576 -2.61129868 -8.5574042 -1.65643489 3.69093277]
[ -9.16767611 -0.60074617 8.46580961 -7.68791995 -9.51274125
-4.04907457 -3.95295082 -8.44538501 -5.80830824 7.51824738
-4.31701729 10.25830408 -9.66835164 -2.20636504 4.80645569
-0.28075858 -2.46094482 -9.83542093 -3.39974554 2.6094632 ]
[ -5.90885199 -5.54495586 3.82752788 5.47445191 -7.56901466
6.17678723 -1.67866919 -8.18495808 4.4351337 3.01243834
-9.64523205 6.18695345 6.18979228 5.44298096 -6.51295969
-3.63470336 -8.37506596 -8.80310855 -5.08267496 -10.22274691]
[-10.09062298 -0.35889548 8.32643481 -6.77082798 -8.61285419
-2.11910124 -3.17342421 -6.94638579 -5.53397978 7.97891402
-3.80749304 8.75537255 -9.07804589 -4.44827072 5.16998281
-0.23544812 -3.15845139 -10.1293545 -3.40367996 2.75186944]
[-10.41115107 -0.07056801 6.86882724 -8.71377695 -8.32764693
-3.4462044 -3.98585553 -8.88198806 -5.91159088 7.21272186
-3.70045788 10.61664859 -8.02765378 -3.49693279 4.83534002
1.23359632 -2.74023638 -9.27244076 -2.15486888 3.4196431 ]
[ -8.44966524 0.15222009 5.96130028 -6.23397972 -7.9689413
-2.85649827 -3.14723019 -9.74077064 -8.66686057 7.3979211
-3.58851647 8.53668524 -9.38493373 -3.42164842 4.92968908
1.02644333 -1.18034366 -9.59222518 -1.65042312 1.98760466]
[ -1.26695207 -5.1749955 2.25787902 -5.70478069 2.88745823
5.13921971 1.58049331 -2.88741482 -2.19152852 -0.57132348
-8.52296615 10.57857894 -4.72710729 -4.52792417 -0.93353762
-1.67313032 -3.61894688 3.93832621 0.97728128 9.79862303]
[ -1.73310782 3.12935171 8.869709 -2.16530809 -5.27978313
-8.22934144 -7.05111819 3.08486652 6.54123198 -1.14497731
-9.58165071 -8.69839638 -8.93336014 4.45107776 -1.79777995
-4.88620719 -3.80454933 -5.04569698 -3.04731689 -8.22577033]
[ -2.09816579 -3.94974648 3.84203159 -4.67855983 1.66200019
5.62767407 0.98819188 -3.35296529 -5.24095731 1.23629149
-6.2691853 9.91397356 -4.09764593 -1.68827378 2.09885207
-1.13690589 -3.42814462 3.34322359 0.59152802 9.48704993]
[ -5.71423354 -4.90467266 5.23284865 6.56889188 -7.31557488
3.77010156 1.62994422 -8.20567917 3.71870688 3.33084662
-8.82169167 6.38913688 6.5786957 8.47159069 -4.07719953
-2.44813625 -8.95443938 -9.83878637 -4.08999396 -7.91651951]
[ -5.13309635 -5.61619555 4.56368682 5.69148863 -6.62759493
4.81955263 0.49868692 -7.71699533 5.03595666 3.37882384
-10.88285834 7.28788877 7.34980894 7.5980248 -3.53495948
-0.94232066 -9.42382332 -8.04659729 -4.38500482 -9.30833585]
[ -8.89091579 0.0235643 6.34667684 -9.5793069 -8.87082747
-2.40385377 -1.69359962 -6.92598514 -7.17032059 7.58525584
-3.59895184 10.49636941 -10.05270864 -2.02568144 5.48475718
3.71408799 -1.96145255 -7.50221358 -1.528048 3.93092539]
[-11.01809206 0.65005831 6.14247758 -7.78617531 -8.85514435
-2.7158194 -2.93945877 -9.03006019 -9.42412677 5.42537483
-5.38538841 10.42461165 -9.02806649 -3.12767159 4.89396746
1.58902233 -2.07727558 -10.35575586 -1.4446981 2.77487666]
[ -0.74485868 -6.18602552 3.16073056 -5.98478001 2.03001923
5.41605093 2.78935152 -5.26648786 -3.27973435 -0.75328004
-10.11021545 8.59663771 -4.33138411 -0.47283699 0.46859837
-3.64985693 -1.84531066 4.7089873 3.06787167 6.90065742]
[ -3.06584439 4.53349698 10.1336167 -3.74905318 -6.95655664
-9.81691591 -7.2474322 1.42385697 9.08973736 0.77069211
-10.19619408 -9.07902001 -9.55303383 2.67772765 -1.12306886
-3.06078111 -3.54054204 -5.29597035 -0.92304292 -9.83068289]
[ -2.05880127 2.99862419 7.57745671 -3.70846263 -5.84651158
-8.50470297 -8.05605841 1.52883426 5.36114513 -1.41274885
-9.19053076 -7.95406859 -8.18249091 4.64107126 -2.57381654
-6.32837147 -4.20825263 -4.93159923 -0.61012825 -10.25227777]
[ -3.22909228 -6.05472756 2.24222264 7.14861504 -6.90604733
7.02793092 0.64583093 -10.41601241 6.74792737 3.19870211
-9.37468184 7.1796093 7.76001188 8.42036659 -4.67752832
-2.72478464 -11.57321487 -9.90822188 -4.63559988 -9.14452738]
[ -0.32848453 3.21469104 8.30582211 -3.40132156 -5.61302158
-8.28486755 -6.43325172 2.7060666 8.84514569 -0.78917256
-10.17674458 -9.10960257 -7.3373645 3.53777927 -0.21122767
-4.66223103 -4.3170636 -6.74353318 -2.01765468 -9.05915 ]
[ 8.87659059 10.02315524 7.63348384 -0.78999537 6.15004821
-0.80182809 3.69219647 4.51633778 -11.05425249 0.65739829
3.29974463 7.66390667 8.03632908 -2.13364756 7.30530683
-3.06916852 2.77256792 9.55910139 2.95371809 0.17630935]
[ -1.55958542 -5.58975913 3.16365453 -7.36712016 1.24880887
7.17323695 1.15015381 -5.6185459 -4.6739325 -0.75362846
-8.38835195 9.18339151 -3.20357234 -3.57954536 0.89379494
-2.88340696 -1.11358099 4.07314025 0.92948887 10.99266101]
[ -8.9643237 -0.75453883 8.50413004 -7.02518689 -9.98634559
-0.82849811 -4.53274002 -9.09084082 -6.6357981 6.83042158
-4.42255934 9.00979303 -9.3140614 -4.14881835 5.6429822
0.09036695 -1.23564314 -9.11116981 -2.82218242 3.26987051]
[ 6.12513995 8.99674188 9.65724553 -1.21327036 5.97868271
0.58261886 3.23666666 3.45474845 -10.39400999 1.72747319
1.04407464 8.52157082 8.3749836 -1.49563235 5.21648981
-5.11484133 1.59931938 9.36096256 2.02000206 -0.25519271]
[ 0.42800271 -4.75157392 0.67068223 -6.63799638 2.30012038
5.88094233 0.17829208 -4.89444755 -3.98047312 0.14807241
-8.49625553 9.03629778 -4.19923372 -2.32524137 0.24132932
1.32375501 -3.56663785 3.5635277 0.47411429 8.68253044]
[ -1.97170984 4.73907768 8.5960669 -2.47929819 -5.67235034
-8.39811441 -5.6700071 2.95260447 6.45600225 -0.64717907
-7.93264074 -9.06398649 -7.22674135 2.16432398 -2.51049852
-5.49224367 -3.72170812 -5.5418101 -3.11545286 -9.43508282]
[ 5.5404227 10.63362173 6.15231789 -0.07974176 5.93787708
-0.73781044 5.14992505 3.48997197 -8.93728186 2.32245306
3.69498089 8.35523137 10.30829502 -2.70288028 7.24147297
-3.36114892 1.9437317 9.31858424 0.94674363 -0.44561825]
[-10.63279579 -0.66113613 5.69772772 -8.40089973 -7.59638738
-2.373329 -5.18061728 -8.07305814 -7.20407407 7.14676877
-3.29742097 8.01483303 -8.96479687 -2.44997393 6.16384879
2.30791879 -2.31465654 -9.68337043 -2.88746672 4.2631566 ]
[ -3.69508504 3.82906398 9.07353261 -3.33653134 -6.01945723
-8.65407873 -6.55893289 1.23018717 7.34892996 0.0579159
-9.03702647 -7.44114238 -9.73628371 2.89247218 -0.89702567
-3.58736906 -3.86314433 -5.30858454 0.7791437 -9.24063714]
[ 8.08644222 8.87941365 9.55340897 -1.30607678 4.9260196
-3.6240853 5.15678805 2.82427402 -9.32367323 1.47574584
2.14141527 8.08307953 9.40738853 -3.88530459 5.98481482
-4.53187247 0.45451155 10.25104408 0.25276069 -1.06347681]
[ -2.74628449 3.31815634 8.14577466 -2.06380547 -7.28394
-8.83086112 -7.16667963 3.31967965 6.82192358 0.16597437
-11.24025004 -9.41793648 -9.04037246 4.14180995 0.84967642
-6.27998076 -2.75612904 -6.74960893 0.89383426 -10.0398925 ]
[ -5.42344655 -5.32197792 4.24357795 6.08716412 -8.04891978
7.03299931 -0.16837288 -8.42380817 5.71020325 3.93160216
-8.68375605 7.9532002 6.29923985 6.1721293 -2.40228886
-1.17646738 -9.20062379 -9.05222142 -4.14841751 -9.44238908]
[ -9.69575602 -1.26165659 7.17574298 -7.44571536 -9.20489898
-1.05552674 -2.24857561 -5.77235264 -4.90003167 7.35809601
-4.37268835 7.73192904 -9.91806323 -0.95139636 5.61510012
0.04955683 -2.37936598 -9.32269386 -1.228146 4.60322034]
[ -7.06918785 -4.53619792 3.49272374 7.30552997 -8.42555883
7.61654036 -0.41245482 -8.375508 5.04293329 3.47474554
-9.71920488 6.42720035 5.97988831 7.16019699 -1.76321135
-1.68806574 -8.29167598 -10.0597838 -5.23408344 -9.84310148]
[ -2.05858916 -5.93886336 3.59727058 -5.48880491 1.75878514
6.3795424 -0.16205924 -5.66474524 -4.29749796 1.45742668
-9.18978875 8.31342714 -4.39611176 -2.36679532 -0.17316643
-1.29472262 -2.03622453 3.16314402 -0.03492922 9.23281076]
[ 7.92722089 8.04099126 5.82785243 0.20723055 5.22372309
-1.29561513 4.43317771 1.40867907 -8.20111531 1.80932808
1.89302643 10.23653117 9.35520272 -3.79136589 6.94000326
-5.19072538 0.82532701 9.37551211 2.82581378 -0.31815209]
[ 7.28484896 9.83327378 7.97406688 -2.15559887 6.72369761
-2.07408644 4.64012786 2.81919429 -9.79647173 1.282567
2.67484452 9.44119935 8.62662785 -2.42891041 7.10834997
-4.36657481 4.29044979 8.4482797 3.54488407 1.36132838]
[ -2.98063191 4.074572 8.98369123 -5.03446163 -6.15695148
-9.32677563 -7.07704334 3.45671378 7.75018594 -0.24121346
-12.86203222 -11.01295 -7.74739849 3.5447667 -0.40269482
-5.35323143 -4.33959903 -5.31024094 0.93685153 -10.66204779]
[ -5.32426187 -5.79372405 4.39744709 5.20303173 -8.83290436
5.01711329 -0.56582087 -10.00838395 3.94702946 1.99261785
-10.42930071 6.23734522 7.1082643 6.75586154 -4.02747233
-1.89817641 -9.73914688 -10.88380192 -6.32326931 -8.66369578]
[ -0.80088647 -6.59404667 4.96584639 -6.5966571 2.4049069
4.51482955 3.66068208 -3.98320439 -4.61739624 -0.913527 -7.262703
7.93022824 -3.92490351 -3.82614015 0.231209 0.22197534
-2.53968922 3.95178895 2.23285485 8.38337081]
[ -5.20347485 -4.76200642 4.78065089 7.80650627 -6.99190434
6.60218947 -0.75750626 -9.93655922 3.55925751 0.18589963
-10.99735713 5.62395171 9.54326579 7.56091331 -5.92309349
-2.0497414 -8.24049052 -10.24857496 -4.82350147 -8.9755812 ]
[ 6.35284627 9.36685896 7.14045089 -2.95191837 4.50853949
-3.85719013 4.67085388 2.95068103 -7.33265801 0.41501278
2.21337601 9.89184692 10.20355969 -2.09113752 5.75919196
-3.96184032 0.56571767 8.68900251 0.8324208 -0.94634905]
[ 7.00613987 9.63080174 9.39265305 -0.99138488 6.53977301
-1.86539383 5.08117662 1.57432817 -9.66341266 1.57295707
3.40884724 9.07733361 7.79035121 -0.32150351 6.94189509
-3.94901528 2.36947411 11.08678972 2.27115378 1.08629576]
[ -3.67115226 2.94766368 8.70206218 -4.21991573 -6.60319089
-7.05401522 -7.47685667 2.03894497 8.76000929 -1.76306672
-10.59755549 -8.02661475 -6.56361209 0.81500712 -2.06300179
-5.29620904 -2.62132026 -3.72818738 0.13529043 -8.75662355]
[ -4.4385462 -5.86517207 4.22034216 6.81720356 -6.3163222
7.47084369 0.69087844 -9.51944839 5.68193768 3.74646092
-10.13513082 6.69211503 6.30429168 7.98231544 -4.01880065
-1.54689604 -8.92889236 -10.49976794 -5.70977596 -10.91617866]
[ 7.40529978 9.21222707 6.76048215 -2.18044132 5.33469714
-2.35195585 5.1215056 3.6254503 -10.23488585 2.88027529
2.76962641 10.17128967 8.91038073 -1.99085837 7.11521375
-5.0265937 0.78647388 10.48042086 0.65197898 -1.77420073]
[ 0.10021932 -6.33457931 3.6229593 -4.29785852 2.4271323
6.50368944 3.11400521 -2.10474759 -2.7370379 -1.24326915
-9.33573515 10.00896148 -4.64442439 -1.40697846 -1.0308081
-2.6851808 -1.39260628 4.2399263 1.30515791 9.58972213]
[ -4.65253395 -4.56866878 4.41349972 7.88223699 -7.51775694
5.32978152 0.44765337 -7.92145155 3.86751537 0.94237832
-9.32159783 8.06150206 5.93296409 7.07037184 -4.13101316
-1.56841113 -11.65892599 -10.66518394 -6.00710412 -11.42686344]]
In [11]:
data=sc.parallelize(X[1:10])
In [12]:
model = KMeans.train(data, k = 5)
In [14]:
model.centers
Out[14]:
[array([-9.52977885, -0.21949739, 7.40559298, -7.35162615, -8.60554592,
-3.11771962, -3.56486519, -8.50363237, -6.48018487, 7.52695109,
-3.85337117, 9.54175262, -9.03974626, -3.39330424, 4.9353669 ,
0.43595824, -2.38499406, -9.70736034, -2.65217937, 2.6921451 ]),
array([ -2.28491968, 4.78324522, 8.63483306, -4.35186232,
-4.60251651, -7.03618689, -8.32221764, 0.66676661,
7.53789095, -0.7162713 , -10.19716729, -11.58301531,
-9.63346717, 1.21774221, -3.09186347, -1.61226172,
-3.38826404, -5.90932636, -0.85335682, -8.50578432]),
array([ -5.90885199, -5.54495586, 3.82752788, 5.47445191,
-7.56901466, 6.17678723, -1.67866919, -8.18495808,
4.4351337 , 3.01243834, -9.64523205, 6.18695345,
6.18979228, 5.44298096, -6.51295969, -3.63470336,
-8.37506596, -8.80310855, -5.08267496, -10.22274691]),
array([ -1.01893618e+00, -4.69772631e+00, 3.19034668e+00,
-4.77147910e+00, 1.78000059e+00, 6.04453602e+00,
1.19143029e+00, -5.57071535e+00, -3.71489146e+00,
9.73624545e-01, -9.70951008e+00, 8.43654627e+00,
-5.30731287e+00, -2.07384086e+00, -5.77645726e-03,
-2.28189013e+00, -1.34796861e+00, 2.73544779e+00,
1.00295494e+00, 9.14007282e+00]),
array([-9.49650876, -1.0219336 , 5.35451856, -7.49414541, -9.12113299,
-3.84506743, -2.52210559, -8.34878782, -5.95092744, 8.55325433,
-6.49382995, 9.91160201, -8.59122399, -1.56711779, 4.7464535 ,
0.10183576, -2.61129868, -8.5574042 , -1.65643489, 3.69093277])]
In [ ]:
Content source: chrwu/docker-spark-hive-ipython
Similar notebooks: