Basic RDD Operation Example


In [1]:
list_a = [x for x in range(10)]

In [2]:
sc.parallelize(list_a).map(lambda x: x+1).reduce(lambda x , y :x+y)


Out[2]:
55

SparkSQL Example


In [3]:
from pyspark.sql import HiveContext, Row
sqlContext= HiveContext(sc)

In [4]:
sqlContext.sql("show tables")


Out[4]:
DataFrame[tableName: string, isTemporary: boolean]

In [5]:
df = sc.parallelize([Row(product="thin",category="cellphone",rev="6000"),Row(product="normal",category="tablet",rev="1500"), Row(product="mini",category="tablet",rev="5500"),Row(product="ultra thin",category="cellphone",rev="5000"),Row(product="very thing",category="cellphone",rev="6000"),Row(product="big",category="tablet",rev="2500"),Row(product="bendable",category="cellphone",rev="3000"),Row(product="foldable",category="cellphone",rev="3000"),Row(product="pro",category="tablet",rev="4500"),Row(product="pro2",category="tablet",rev="6500")])
productRevenue_df = sqlContext.createDataFrame(df)
productRevenue_df.show()


+---------+----------+----+
| category|   product| rev|
+---------+----------+----+
|cellphone|      thin|6000|
|   tablet|    normal|1500|
|   tablet|      mini|5500|
|cellphone|ultra thin|5000|
|cellphone|very thing|6000|
|   tablet|       big|2500|
|cellphone|  bendable|3000|
|cellphone|  foldable|3000|
|   tablet|       pro|4500|
|   tablet|      pro2|6500|
+---------+----------+----+

Shell Script Example


In [16]:
!pwd


/home

In [17]:
!ls


Untitled.ipynb	derby.log  metastore_db

Spark Mllib Example


In [8]:
from sklearn.datasets import make_blobs
from pyspark.mllib.clustering import KMeans

In [9]:
X, y = make_blobs(50,20, 5)

In [10]:
print X


[[  8.79265169   8.69813553   7.26584523  -0.95237571   7.66482599
   -1.84022236   4.62125024   2.5017508  -10.41165513   2.09050148
    3.17796425   7.85348358  10.24431579  -1.88136637   4.73685396
   -4.22206454   1.3553198    8.96417314   0.96634182  -0.1082746 ]
 [ -0.57021603  -2.95011861   2.8014989   -3.77173248   2.4647076
    5.81325698   1.42435662  -7.04384883  -3.05966505   1.6740405
   -9.84026239   8.24840516  -5.1131863   -2.22311618   0.95482552
   -3.38819958  -2.10016993   2.59753474   1.76189002   9.40326368]
 [ -2.28491968   4.78324522   8.63483306  -4.35186232  -4.60251651
   -7.03618689  -8.32221764   0.66676661   7.53789095  -0.7162713
  -10.19716729 -11.58301531  -9.63346717   1.21774221  -3.09186347
   -1.61226172  -3.38826404  -5.90932636  -0.85335682  -8.50578432]
 [ -1.46765634  -6.44533401   3.57919445  -5.77122572   1.09529359
    6.27581505   0.95850395  -4.09758186  -4.37011787   0.27320859
   -9.57875777   8.62468737  -5.50143944  -1.92456555  -0.96637844
   -1.17558067  -0.5957673    2.87336084   0.24401986   8.87688196]
 [ -9.49650876  -1.0219336    5.35451856  -7.49414541  -9.12113299
   -3.84506743  -2.52210559  -8.34878782  -5.95092744   8.55325433
   -6.49382995   9.91160201  -8.59122399  -1.56711779   4.7464535
    0.10183576  -2.61129868  -8.5574042   -1.65643489   3.69093277]
 [ -9.16767611  -0.60074617   8.46580961  -7.68791995  -9.51274125
   -4.04907457  -3.95295082  -8.44538501  -5.80830824   7.51824738
   -4.31701729  10.25830408  -9.66835164  -2.20636504   4.80645569
   -0.28075858  -2.46094482  -9.83542093  -3.39974554   2.6094632 ]
 [ -5.90885199  -5.54495586   3.82752788   5.47445191  -7.56901466
    6.17678723  -1.67866919  -8.18495808   4.4351337    3.01243834
   -9.64523205   6.18695345   6.18979228   5.44298096  -6.51295969
   -3.63470336  -8.37506596  -8.80310855  -5.08267496 -10.22274691]
 [-10.09062298  -0.35889548   8.32643481  -6.77082798  -8.61285419
   -2.11910124  -3.17342421  -6.94638579  -5.53397978   7.97891402
   -3.80749304   8.75537255  -9.07804589  -4.44827072   5.16998281
   -0.23544812  -3.15845139 -10.1293545   -3.40367996   2.75186944]
 [-10.41115107  -0.07056801   6.86882724  -8.71377695  -8.32764693
   -3.4462044   -3.98585553  -8.88198806  -5.91159088   7.21272186
   -3.70045788  10.61664859  -8.02765378  -3.49693279   4.83534002
    1.23359632  -2.74023638  -9.27244076  -2.15486888   3.4196431 ]
 [ -8.44966524   0.15222009   5.96130028  -6.23397972  -7.9689413
   -2.85649827  -3.14723019  -9.74077064  -8.66686057   7.3979211
   -3.58851647   8.53668524  -9.38493373  -3.42164842   4.92968908
    1.02644333  -1.18034366  -9.59222518  -1.65042312   1.98760466]
 [ -1.26695207  -5.1749955    2.25787902  -5.70478069   2.88745823
    5.13921971   1.58049331  -2.88741482  -2.19152852  -0.57132348
   -8.52296615  10.57857894  -4.72710729  -4.52792417  -0.93353762
   -1.67313032  -3.61894688   3.93832621   0.97728128   9.79862303]
 [ -1.73310782   3.12935171   8.869709    -2.16530809  -5.27978313
   -8.22934144  -7.05111819   3.08486652   6.54123198  -1.14497731
   -9.58165071  -8.69839638  -8.93336014   4.45107776  -1.79777995
   -4.88620719  -3.80454933  -5.04569698  -3.04731689  -8.22577033]
 [ -2.09816579  -3.94974648   3.84203159  -4.67855983   1.66200019
    5.62767407   0.98819188  -3.35296529  -5.24095731   1.23629149
   -6.2691853    9.91397356  -4.09764593  -1.68827378   2.09885207
   -1.13690589  -3.42814462   3.34322359   0.59152802   9.48704993]
 [ -5.71423354  -4.90467266   5.23284865   6.56889188  -7.31557488
    3.77010156   1.62994422  -8.20567917   3.71870688   3.33084662
   -8.82169167   6.38913688   6.5786957    8.47159069  -4.07719953
   -2.44813625  -8.95443938  -9.83878637  -4.08999396  -7.91651951]
 [ -5.13309635  -5.61619555   4.56368682   5.69148863  -6.62759493
    4.81955263   0.49868692  -7.71699533   5.03595666   3.37882384
  -10.88285834   7.28788877   7.34980894   7.5980248   -3.53495948
   -0.94232066  -9.42382332  -8.04659729  -4.38500482  -9.30833585]
 [ -8.89091579   0.0235643    6.34667684  -9.5793069   -8.87082747
   -2.40385377  -1.69359962  -6.92598514  -7.17032059   7.58525584
   -3.59895184  10.49636941 -10.05270864  -2.02568144   5.48475718
    3.71408799  -1.96145255  -7.50221358  -1.528048     3.93092539]
 [-11.01809206   0.65005831   6.14247758  -7.78617531  -8.85514435
   -2.7158194   -2.93945877  -9.03006019  -9.42412677   5.42537483
   -5.38538841  10.42461165  -9.02806649  -3.12767159   4.89396746
    1.58902233  -2.07727558 -10.35575586  -1.4446981    2.77487666]
 [ -0.74485868  -6.18602552   3.16073056  -5.98478001   2.03001923
    5.41605093   2.78935152  -5.26648786  -3.27973435  -0.75328004
  -10.11021545   8.59663771  -4.33138411  -0.47283699   0.46859837
   -3.64985693  -1.84531066   4.7089873    3.06787167   6.90065742]
 [ -3.06584439   4.53349698  10.1336167   -3.74905318  -6.95655664
   -9.81691591  -7.2474322    1.42385697   9.08973736   0.77069211
  -10.19619408  -9.07902001  -9.55303383   2.67772765  -1.12306886
   -3.06078111  -3.54054204  -5.29597035  -0.92304292  -9.83068289]
 [ -2.05880127   2.99862419   7.57745671  -3.70846263  -5.84651158
   -8.50470297  -8.05605841   1.52883426   5.36114513  -1.41274885
   -9.19053076  -7.95406859  -8.18249091   4.64107126  -2.57381654
   -6.32837147  -4.20825263  -4.93159923  -0.61012825 -10.25227777]
 [ -3.22909228  -6.05472756   2.24222264   7.14861504  -6.90604733
    7.02793092   0.64583093 -10.41601241   6.74792737   3.19870211
   -9.37468184   7.1796093    7.76001188   8.42036659  -4.67752832
   -2.72478464 -11.57321487  -9.90822188  -4.63559988  -9.14452738]
 [ -0.32848453   3.21469104   8.30582211  -3.40132156  -5.61302158
   -8.28486755  -6.43325172   2.7060666    8.84514569  -0.78917256
  -10.17674458  -9.10960257  -7.3373645    3.53777927  -0.21122767
   -4.66223103  -4.3170636   -6.74353318  -2.01765468  -9.05915   ]
 [  8.87659059  10.02315524   7.63348384  -0.78999537   6.15004821
   -0.80182809   3.69219647   4.51633778 -11.05425249   0.65739829
    3.29974463   7.66390667   8.03632908  -2.13364756   7.30530683
   -3.06916852   2.77256792   9.55910139   2.95371809   0.17630935]
 [ -1.55958542  -5.58975913   3.16365453  -7.36712016   1.24880887
    7.17323695   1.15015381  -5.6185459   -4.6739325   -0.75362846
   -8.38835195   9.18339151  -3.20357234  -3.57954536   0.89379494
   -2.88340696  -1.11358099   4.07314025   0.92948887  10.99266101]
 [ -8.9643237   -0.75453883   8.50413004  -7.02518689  -9.98634559
   -0.82849811  -4.53274002  -9.09084082  -6.6357981    6.83042158
   -4.42255934   9.00979303  -9.3140614   -4.14881835   5.6429822
    0.09036695  -1.23564314  -9.11116981  -2.82218242   3.26987051]
 [  6.12513995   8.99674188   9.65724553  -1.21327036   5.97868271
    0.58261886   3.23666666   3.45474845 -10.39400999   1.72747319
    1.04407464   8.52157082   8.3749836   -1.49563235   5.21648981
   -5.11484133   1.59931938   9.36096256   2.02000206  -0.25519271]
 [  0.42800271  -4.75157392   0.67068223  -6.63799638   2.30012038
    5.88094233   0.17829208  -4.89444755  -3.98047312   0.14807241
   -8.49625553   9.03629778  -4.19923372  -2.32524137   0.24132932
    1.32375501  -3.56663785   3.5635277    0.47411429   8.68253044]
 [ -1.97170984   4.73907768   8.5960669   -2.47929819  -5.67235034
   -8.39811441  -5.6700071    2.95260447   6.45600225  -0.64717907
   -7.93264074  -9.06398649  -7.22674135   2.16432398  -2.51049852
   -5.49224367  -3.72170812  -5.5418101   -3.11545286  -9.43508282]
 [  5.5404227   10.63362173   6.15231789  -0.07974176   5.93787708
   -0.73781044   5.14992505   3.48997197  -8.93728186   2.32245306
    3.69498089   8.35523137  10.30829502  -2.70288028   7.24147297
   -3.36114892   1.9437317    9.31858424   0.94674363  -0.44561825]
 [-10.63279579  -0.66113613   5.69772772  -8.40089973  -7.59638738
   -2.373329    -5.18061728  -8.07305814  -7.20407407   7.14676877
   -3.29742097   8.01483303  -8.96479687  -2.44997393   6.16384879
    2.30791879  -2.31465654  -9.68337043  -2.88746672   4.2631566 ]
 [ -3.69508504   3.82906398   9.07353261  -3.33653134  -6.01945723
   -8.65407873  -6.55893289   1.23018717   7.34892996   0.0579159
   -9.03702647  -7.44114238  -9.73628371   2.89247218  -0.89702567
   -3.58736906  -3.86314433  -5.30858454   0.7791437   -9.24063714]
 [  8.08644222   8.87941365   9.55340897  -1.30607678   4.9260196
   -3.6240853    5.15678805   2.82427402  -9.32367323   1.47574584
    2.14141527   8.08307953   9.40738853  -3.88530459   5.98481482
   -4.53187247   0.45451155  10.25104408   0.25276069  -1.06347681]
 [ -2.74628449   3.31815634   8.14577466  -2.06380547  -7.28394
   -8.83086112  -7.16667963   3.31967965   6.82192358   0.16597437
  -11.24025004  -9.41793648  -9.04037246   4.14180995   0.84967642
   -6.27998076  -2.75612904  -6.74960893   0.89383426 -10.0398925 ]
 [ -5.42344655  -5.32197792   4.24357795   6.08716412  -8.04891978
    7.03299931  -0.16837288  -8.42380817   5.71020325   3.93160216
   -8.68375605   7.9532002    6.29923985   6.1721293   -2.40228886
   -1.17646738  -9.20062379  -9.05222142  -4.14841751  -9.44238908]
 [ -9.69575602  -1.26165659   7.17574298  -7.44571536  -9.20489898
   -1.05552674  -2.24857561  -5.77235264  -4.90003167   7.35809601
   -4.37268835   7.73192904  -9.91806323  -0.95139636   5.61510012
    0.04955683  -2.37936598  -9.32269386  -1.228146     4.60322034]
 [ -7.06918785  -4.53619792   3.49272374   7.30552997  -8.42555883
    7.61654036  -0.41245482  -8.375508     5.04293329   3.47474554
   -9.71920488   6.42720035   5.97988831   7.16019699  -1.76321135
   -1.68806574  -8.29167598 -10.0597838   -5.23408344  -9.84310148]
 [ -2.05858916  -5.93886336   3.59727058  -5.48880491   1.75878514
    6.3795424   -0.16205924  -5.66474524  -4.29749796   1.45742668
   -9.18978875   8.31342714  -4.39611176  -2.36679532  -0.17316643
   -1.29472262  -2.03622453   3.16314402  -0.03492922   9.23281076]
 [  7.92722089   8.04099126   5.82785243   0.20723055   5.22372309
   -1.29561513   4.43317771   1.40867907  -8.20111531   1.80932808
    1.89302643  10.23653117   9.35520272  -3.79136589   6.94000326
   -5.19072538   0.82532701   9.37551211   2.82581378  -0.31815209]
 [  7.28484896   9.83327378   7.97406688  -2.15559887   6.72369761
   -2.07408644   4.64012786   2.81919429  -9.79647173   1.282567
    2.67484452   9.44119935   8.62662785  -2.42891041   7.10834997
   -4.36657481   4.29044979   8.4482797    3.54488407   1.36132838]
 [ -2.98063191   4.074572     8.98369123  -5.03446163  -6.15695148
   -9.32677563  -7.07704334   3.45671378   7.75018594  -0.24121346
  -12.86203222 -11.01295     -7.74739849   3.5447667   -0.40269482
   -5.35323143  -4.33959903  -5.31024094   0.93685153 -10.66204779]
 [ -5.32426187  -5.79372405   4.39744709   5.20303173  -8.83290436
    5.01711329  -0.56582087 -10.00838395   3.94702946   1.99261785
  -10.42930071   6.23734522   7.1082643    6.75586154  -4.02747233
   -1.89817641  -9.73914688 -10.88380192  -6.32326931  -8.66369578]
 [ -0.80088647  -6.59404667   4.96584639  -6.5966571    2.4049069
    4.51482955   3.66068208  -3.98320439  -4.61739624  -0.913527    -7.262703
    7.93022824  -3.92490351  -3.82614015   0.231209     0.22197534
   -2.53968922   3.95178895   2.23285485   8.38337081]
 [ -5.20347485  -4.76200642   4.78065089   7.80650627  -6.99190434
    6.60218947  -0.75750626  -9.93655922   3.55925751   0.18589963
  -10.99735713   5.62395171   9.54326579   7.56091331  -5.92309349
   -2.0497414   -8.24049052 -10.24857496  -4.82350147  -8.9755812 ]
 [  6.35284627   9.36685896   7.14045089  -2.95191837   4.50853949
   -3.85719013   4.67085388   2.95068103  -7.33265801   0.41501278
    2.21337601   9.89184692  10.20355969  -2.09113752   5.75919196
   -3.96184032   0.56571767   8.68900251   0.8324208   -0.94634905]
 [  7.00613987   9.63080174   9.39265305  -0.99138488   6.53977301
   -1.86539383   5.08117662   1.57432817  -9.66341266   1.57295707
    3.40884724   9.07733361   7.79035121  -0.32150351   6.94189509
   -3.94901528   2.36947411  11.08678972   2.27115378   1.08629576]
 [ -3.67115226   2.94766368   8.70206218  -4.21991573  -6.60319089
   -7.05401522  -7.47685667   2.03894497   8.76000929  -1.76306672
  -10.59755549  -8.02661475  -6.56361209   0.81500712  -2.06300179
   -5.29620904  -2.62132026  -3.72818738   0.13529043  -8.75662355]
 [ -4.4385462   -5.86517207   4.22034216   6.81720356  -6.3163222
    7.47084369   0.69087844  -9.51944839   5.68193768   3.74646092
  -10.13513082   6.69211503   6.30429168   7.98231544  -4.01880065
   -1.54689604  -8.92889236 -10.49976794  -5.70977596 -10.91617866]
 [  7.40529978   9.21222707   6.76048215  -2.18044132   5.33469714
   -2.35195585   5.1215056    3.6254503  -10.23488585   2.88027529
    2.76962641  10.17128967   8.91038073  -1.99085837   7.11521375
   -5.0265937    0.78647388  10.48042086   0.65197898  -1.77420073]
 [  0.10021932  -6.33457931   3.6229593   -4.29785852   2.4271323
    6.50368944   3.11400521  -2.10474759  -2.7370379   -1.24326915
   -9.33573515  10.00896148  -4.64442439  -1.40697846  -1.0308081
   -2.6851808   -1.39260628   4.2399263    1.30515791   9.58972213]
 [ -4.65253395  -4.56866878   4.41349972   7.88223699  -7.51775694
    5.32978152   0.44765337  -7.92145155   3.86751537   0.94237832
   -9.32159783   8.06150206   5.93296409   7.07037184  -4.13101316
   -1.56841113 -11.65892599 -10.66518394  -6.00710412 -11.42686344]]

In [11]:
data=sc.parallelize(X[1:10])

In [12]:
model = KMeans.train(data, k = 5)

In [14]:
model.centers


Out[14]:
[array([-9.52977885, -0.21949739,  7.40559298, -7.35162615, -8.60554592,
        -3.11771962, -3.56486519, -8.50363237, -6.48018487,  7.52695109,
        -3.85337117,  9.54175262, -9.03974626, -3.39330424,  4.9353669 ,
         0.43595824, -2.38499406, -9.70736034, -2.65217937,  2.6921451 ]),
 array([ -2.28491968,   4.78324522,   8.63483306,  -4.35186232,
         -4.60251651,  -7.03618689,  -8.32221764,   0.66676661,
          7.53789095,  -0.7162713 , -10.19716729, -11.58301531,
         -9.63346717,   1.21774221,  -3.09186347,  -1.61226172,
         -3.38826404,  -5.90932636,  -0.85335682,  -8.50578432]),
 array([ -5.90885199,  -5.54495586,   3.82752788,   5.47445191,
         -7.56901466,   6.17678723,  -1.67866919,  -8.18495808,
          4.4351337 ,   3.01243834,  -9.64523205,   6.18695345,
          6.18979228,   5.44298096,  -6.51295969,  -3.63470336,
         -8.37506596,  -8.80310855,  -5.08267496, -10.22274691]),
 array([ -1.01893618e+00,  -4.69772631e+00,   3.19034668e+00,
         -4.77147910e+00,   1.78000059e+00,   6.04453602e+00,
          1.19143029e+00,  -5.57071535e+00,  -3.71489146e+00,
          9.73624545e-01,  -9.70951008e+00,   8.43654627e+00,
         -5.30731287e+00,  -2.07384086e+00,  -5.77645726e-03,
         -2.28189013e+00,  -1.34796861e+00,   2.73544779e+00,
          1.00295494e+00,   9.14007282e+00]),
 array([-9.49650876, -1.0219336 ,  5.35451856, -7.49414541, -9.12113299,
        -3.84506743, -2.52210559, -8.34878782, -5.95092744,  8.55325433,
        -6.49382995,  9.91160201, -8.59122399, -1.56711779,  4.7464535 ,
         0.10183576, -2.61129868, -8.5574042 , -1.65643489,  3.69093277])]

In [ ]: