In [1]:
import sys
sys.path.append("..")

In [2]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('abc').getOrCreate()

In [3]:
df=spark.read.csv('data/foo.csv',header=True)

In [4]:
from optimus import Optimus
op = Optimus(spark)


<optimus.spark.Spark object at 0x0000024CCA0D7208>

In [5]:
df.table()


Viewing 19 of 19 rows / 8 columns
1 partition(s)
id
1 (string)
nullable
firstName
2 (string)
nullable
lastName
3 (string)
nullable
billingId
4 (string)
nullable
product
5 (string)
nullable
price
6 (string)
nullable
birth
7 (string)
nullable
dummyCol
8 (string)
nullable
1
Luis
Alvarez$$%!
123
Cake
10
1980/07/07
never
2
André
Ampère
423
piza
8
1950/07/08
gonna
3
NiELS
Böhr//((%%
551
pizza
8
1990/07/09
give
4
PAUL
dirac$
521
pizza
8
1954/07/10
you
5
Albert
Einstein
634
pizza
8
1990/07/11
up
6
Galileo
⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱GALiLEI
672
arepa
5
1930/08/12
never
7
CaRL
Ga%%%uss
323
taco
3
1970/07/13
gonna
8
David
H$$$ilbert
624
taaaccoo
3
1950/07/14
let
9
Johannes
KEPLER
735
taco
3
1920/04/22
you
10
JaMES
M$$ax%%well
875
taco
3
1923/03/12
down
11
Isaac
Newton
992
pasta
9
1999/02/15
never⸱
12
Emmy%%
Nöether$
234
pasta
9
1993/12/08
gonna
13
Max!!!
Planck!!!
111
hamburguer
4
1994/01/04
run⸱
14
Fred
Hoy&&&le
553
pizzza
8
1997/06/27
around
15
(((⸱⸱⸱Heinrich⸱)))))
Hertz
116
pizza
8
1956/11/30
and
16
William
Gilbert###
886
BEER
2
1958/03/26
desert
17
Marie
CURIE
912
Rice
1
2000/03/22
you
18
Arthur
COM%%%pton
812
110790
5
1899/01/01
#
19
JAMES
Chadwick
467
null
10
1921/05/03
#
Viewing 19 of 19 rows / 8 columns
1 partition(s)