notebook.community

Edit and run



In [45]:

    
import sys
from random import random
from operator import add

from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("PythonPi").getOrCreate()

#partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2
partitions = 10
n = 100000 * partitions

def f(_):
    x = random() * 2 - 1
    y = random() * 2 - 1
    return 1 if x ** 2 + y ** 2 <= 1 else 0

count = spark.sparkContext.parallelize(range(1, n + 1), partitions).map(f).reduce(add)
print("Pi is roughly %f" % (4.0 * count / n))

spark.stop()









    



Pi is roughly 3.144344



In [46]:

    
sourceBytes = '                                                                                         \n\
import sys                                                                                              \n\
from random import random                                                                               \n\
from operator import add                                                                                \n\
                                                                                                        \n\
from pyspark.sql import SparkSession                                                                    \n\
                                                                                                        \n\
spark = SparkSession.builder.appName("PythonPi").getOrCreate()                                          \n\
                                                                                                        \n\
partitions = 10                                                                                         \n\
n = 100000 * partitions                                                                                 \n\
                                                                                                        \n\
def f(_):                                                                                               \n\
    x = random() * 2 - 1                                                                                \n\
    y = random() * 2 - 1                                                                                \n\
    return 1 if x ** 2 + y ** 2 <= 1 else 0                                                             \n\
                                                                                                        \n\
count = spark.sparkContext.parallelize(range(1, n + 1), partitions).map(f).reduce(add)                  \n\
print("Pi is roughly %f" % (4.0 * count / n))                                                           \n\
                                                                                                        \n\
spark.stop()                                                                                            \n\
'.encode('utf-8')



In [47]:

    
print(sourceBytes.decode("utf-8"))









    



                                                                                         
import sys                                                                                              
from random import random                                                                               
from operator import add                                                                                
                                                                                                        
from pyspark.sql import SparkSession                                                                    
                                                                                                        
spark = SparkSession.builder.appName("PythonPi").getOrCreate()                                          
                                                                                                        
partitions = 10                                                                                         
n = 100000 * partitions                                                                                 
                                                                                                        
def f(_):                                                                                               
    x = random() * 2 - 1                                                                                
    y = random() * 2 - 1                                                                                
    return 1 if x ** 2 + y ** 2 <= 1 else 0                                                             
                                                                                                        
count = spark.sparkContext.parallelize(range(1, n + 1), partitions).map(f).reduce(add)                  
print("Pi is roughly %f" % (4.0 * count / n))                                                           
                                                                                                        
spark.stop()



In [54]:

    
# This can be removed once the Docker file for jupyterhb is intact
!cd ~ && rm scripts && ln -s /root/pipeline/jupyterhub.ml/scripts



In [55]:

    
!ls ~/scripts/pi









    



pi.py



In [56]:

    
!mkdir -p ~/scripts/pi/

with open('/root/scripts/pi/pi.py', 'wb') as f:
  f.write(sourceBytes)

!cat ~/scripts/pi/pi.py









    



                                                                                         
import sys                                                                                              
from random import random                                                                               
from operator import add                                                                                
                                                                                                        
from pyspark.sql import SparkSession                                                                    
                                                                                                        
spark = SparkSession.builder.appName("PythonPi").getOrCreate()                                          
                                                                                                        
partitions = 10                                                                                         
n = 100000 * partitions                                                                                 
                                                                                                        
def f(_):                                                                                               
    x = random() * 2 - 1                                                                                
    y = random() * 2 - 1                                                                                
    return 1 if x ** 2 + y ** 2 <= 1 else 0                                                             
                                                                                                        
count = spark.sparkContext.parallelize(range(1, n + 1), partitions).map(f).reduce(add)                  
print("Pi is roughly %f" % (4.0 * count / n))                                                           
                                                                                                        
spark.stop()



In [57]:

    
!git status









    



On branch master
Your branch is up-to-date with 'origin/master'.

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git checkout -- <file>..." to discard changes in working directory)

	modified:   ../../Conferences/StartupML/Jan-20-2017/SparkMLTensorflowAI-HybridCloud-ContinuousDeployment-Azure.ipynb
	modified:   ../../Scikit-Learn/DecisionTreeIris.pmml
	modified:   ../../Scikit-Learn/Scikit-Learn-PMML-Deploy.ipynb
	modified:   PySpark - Calculate Pi.ipynb
	modified:   ../../TensorFlow/DeepDream/deepdream.ipynb
	modified:   ../../TensorFlow/GPU/Test GPU.ipynb
	modified:   ../../TensorFlow/Optimizations/Train, Optimize, and Deploy Tensorflow AI Model.ipynb

Untracked files:
  (use "git add <file>..." to include in what will be committed)

	../../Conferences/StartupML/Jan-20-2017/checkpoint/
	../../TensorFlow/Optimizations/checkpoint/
	../../TensorFlow/Optimizations/model/
	../../../scripts/pi/

no changes added to commit (use "git add" and/or "git commit -a")



In [59]:

    
!git add --all ~/scripts



In [60]:

    
!git status









    



On branch master
Your branch is up-to-date with 'origin/master'.

Changes to be committed:
  (use "git reset HEAD <file>..." to unstage)

	new file:   ../../../scripts/pi/pi.py

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git checkout -- <file>..." to discard changes in working directory)

	modified:   ../../Conferences/StartupML/Jan-20-2017/SparkMLTensorflowAI-HybridCloud-ContinuousDeployment-Azure.ipynb
	modified:   ../../Scikit-Learn/DecisionTreeIris.pmml
	modified:   ../../Scikit-Learn/Scikit-Learn-PMML-Deploy.ipynb
	modified:   PySpark - Calculate Pi.ipynb
	modified:   ../../TensorFlow/DeepDream/deepdream.ipynb
	modified:   ../../TensorFlow/GPU/Test GPU.ipynb
	modified:   ../../TensorFlow/Optimizations/Train, Optimize, and Deploy Tensorflow AI Model.ipynb

Untracked files:
  (use "git add <file>..." to include in what will be committed)

	../../Conferences/StartupML/Jan-20-2017/checkpoint/
	../../TensorFlow/Optimizations/checkpoint/
	../../TensorFlow/Optimizations/model/



In [19]:

    
!git commit -m "updated pyspark scripts"









    



[master 48dd592] updated pyspark scripts
 1 file changed, 1 insertion(+), 4 deletions(-)



In [20]:

    
!git status









    



On branch master
Your branch is ahead of 'origin/master' by 1 commit.
  (use "git push" to publish your local commits)

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git checkout -- <file>..." to discard changes in working directory)

	modified:   PySpark - Calculate Pi.ipynb

Untracked files:
  (use "git add <file>..." to include in what will be committed)

	../../Conferences/StartupML/Jan-20-2017/checkpoint/

no changes added to commit (use "git add" and/or "git commit -a")



In [96]:

    
# If this fails with "Permission denied", use terminal within jupyter to manually `git push`
!git push









    



warning: push.default is unset; its implicit value is changing in
Git 2.0 from 'matching' to 'simple'. To squelch this message
and maintain the current behavior after the default changes, use:

  git config --global push.default matching

To squelch this message and adopt the new behavior now, use:

  git config --global push.default simple

When push.default is set to 'matching', git will push local branches
to the remote branches that already exist with the same name.

In Git 2.0, Git will default to the more conservative 'simple'
behavior, which only pushes the current branch to the corresponding
remote branch that 'git pull' uses to update the current branch.

See 'git help config' and search for 'push.default' for further information.
(the 'simple' mode was introduced in Git 1.7.11. Use the similar mode
'current' instead of 'simple' if you sometimes use older versions of Git)

Permission denied (publickey).
fatal: Could not read from remote repository.

Please make sure you have the correct access rights
and the repository exists.

Airflow Workflow Deploys New PySpark Job through Github Post-Commit Webhook to Triggers



In [21]:

    
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

from IPython.display import clear_output, Image, display, HTML

html = '<iframe width=100% height=500px src="http://demo.pipeline.io:8080/admin">'
display(HTML(html))



In [ ]: