In [1]:
# create entry points to spark
try:
sc.stop()
except:
pass
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
sc=SparkContext()
spark = SparkSession(sparkContext=sc)
In [3]:
mtcars = spark.read.csv('../../data/mtcars.csv', inferSchema=True, header=True)
mtcars = mtcars.withColumnRenamed('_c0', 'model')
mtcars.show()
In [4]:
mpg_between = mtcars.cyl.between(4,6)
mpg_between
Out[4]:
In [5]:
mtcars.select(mtcars.cyl, mpg_between).show(5)
In [6]:
model_contains = mtcars.model.contains('Ho')
model_contains
Out[6]:
In [7]:
mtcars.select(mtcars.model, model_contains).show(5)
In [8]:
model_endswith = mtcars.model.endswith('t')
model_endswith
Out[8]:
In [9]:
mtcars.select(mtcars.model, model_endswith).show(6)
In [10]:
from pyspark.sql import Row
df = spark.createDataFrame([Row(name='Tom', height=80), Row(name='Alice', height=None)])
df.show()
In [11]:
height_isNotNull = df.height.isNotNull()
height_isNotNull
Out[11]:
In [12]:
df.select(df.height, height_isNotNull).show()
In [13]:
height_isNull = df.height.isNull()
height_isNull
Out[13]:
In [14]:
df.select(df.height, height_isNull).show()
In [15]:
carb_isin = mtcars.carb.isin([2, 3])
carb_isin
Out[15]:
In [16]:
mtcars.select(mtcars.carb, carb_isin).show(10)
In [17]:
model_like = mtcars.model.like('Ho%')
model_like
Out[17]:
In [18]:
mtcars.select(mtcars.model, model_like).show(10)
In [19]:
model_rlike = mtcars.model.rlike('t$')
model_rlike
Out[19]:
In [20]:
mtcars.select(mtcars.model, model_rlike).show()
In [21]:
model_startswith = mtcars.model.startswith('Merc')
model_startswith
Out[21]:
In [22]:
mtcars.select(mtcars.model, model_startswith).show()
In [ ]: