The SparkContext.addPyFiles() function can be used to add py files. We can define objects and variables in these files and make them available to the Spark cluster.
In [1]:
from pyspark import SparkConf, SparkContext, SparkFiles
from pyspark.sql import SparkSession
In [2]:
sc = SparkContext(conf=SparkConf())
In [3]:
sc.addPyFile('pyFiles/my_module.py')
In [4]:
SparkFiles.get('my_module.py')
Out[4]:
In [5]:
from my_module import *
In [6]:
addPyFiles_is_successfull()
Out[6]:
In [7]:
sum_two_variables(4,5)
Out[7]: