notebook.community

Edit and run



In [1]:

    
import sys
sys.path.append("..")



In [2]:

    
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('abc').getOrCreate()



In [3]:

    
df=spark.read.csv('data/foo.csv',header=True)



In [4]:

    
from optimus import Optimus
op = Optimus(spark)









    



<optimus.spark.Spark object at 0x0000024CCA0D7208>



In [5]:

    
df.table()









    










Viewing 19 of 19 rows / 8 columns
1 partition(s)


    
    
        
        
            id
            1 (string)
            
                
                nullable
                
            
        
        
        
            firstName
            2 (string)
            
                
                nullable
                
            
        
        
        
            lastName
            3 (string)
            
                
                nullable
                
            
        
        
        
            billingId
            4 (string)
            
                
                nullable
                
            
        
        
        
            product
            5 (string)
            
                
                nullable
                
            
        
        
        
            price
            6 (string)
            
                
                nullable
                
            
        
        
        
            birth
            7 (string)
            
                
                nullable
                
            
        
        
        
            dummyCol
            8 (string)
            
                
                nullable
                
            
        
        
    

    
    
    
    
        
        
            1
        
        
        
            Luis
        
        
        
            Alvarez$$%!
        
        
        
            123
        
        
        
            Cake
        
        
        
            10
        
        
        
            1980/07/07
        
        
        
            never
        
        
    
    
    
        
        
            2
        
        
        
            André
        
        
        
            Ampère
        
        
        
            423
        
        
        
            piza
        
        
        
            8
        
        
        
            1950/07/08
        
        
        
            gonna
        
        
    
    
    
        
        
            3
        
        
        
            NiELS
        
        
        
            Böhr//((%%
        
        
        
            551
        
        
        
            pizza
        
        
        
            8
        
        
        
            1990/07/09
        
        
        
            give
        
        
    
    
    
        
        
            4
        
        
        
            PAUL
        
        
        
            dirac$
        
        
        
            521
        
        
        
            pizza
        
        
        
            8
        
        
        
            1954/07/10
        
        
        
            you
        
        
    
    
    
        
        
            5
        
        
        
            Albert
        
        
        
            Einstein
        
        
        
            634
        
        
        
            pizza
        
        
        
            8
        
        
        
            1990/07/11
        
        
        
            up
        
        
    
    
    
        
        
            6
        
        
        
            Galileo
        
        
        
            ⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱GALiLEI
        
        
        
            672
        
        
        
            arepa
        
        
        
            5
        
        
        
            1930/08/12
        
        
        
            never
        
        
    
    
    
        
        
            7
        
        
        
            CaRL
        
        
        
            Ga%%%uss
        
        
        
            323
        
        
        
            taco
        
        
        
            3
        
        
        
            1970/07/13
        
        
        
            gonna
        
        
    
    
    
        
        
            8
        
        
        
            David
        
        
        
            H$$$ilbert
        
        
        
            624
        
        
        
            taaaccoo
        
        
        
            3
        
        
        
            1950/07/14
        
        
        
            let
        
        
    
    
    
        
        
            9
        
        
        
            Johannes
        
        
        
            KEPLER
        
        
        
            735
        
        
        
            taco
        
        
        
            3
        
        
        
            1920/04/22
        
        
        
            you
        
        
    
    
    
        
        
            10
        
        
        
            JaMES
        
        
        
            M$$ax%%well
        
        
        
            875
        
        
        
            taco
        
        
        
            3
        
        
        
            1923/03/12
        
        
        
            down
        
        
    
    
    
        
        
            11
        
        
        
            Isaac
        
        
        
            Newton
        
        
        
            992
        
        
        
            pasta
        
        
        
            9
        
        
        
            1999/02/15
        
        
        
            never⸱
        
        
    
    
    
        
        
            12
        
        
        
            Emmy%%
        
        
        
            Nöether$
        
        
        
            234
        
        
        
            pasta
        
        
        
            9
        
        
        
            1993/12/08
        
        
        
            gonna
        
        
    
    
    
        
        
            13
        
        
        
            Max!!!
        
        
        
            Planck!!!
        
        
        
            111
        
        
        
            hamburguer
        
        
        
            4
        
        
        
            1994/01/04
        
        
        
            run⸱
        
        
    
    
    
        
        
            14
        
        
        
            Fred
        
        
        
            Hoy&&&le
        
        
        
            553
        
        
        
            pizzza
        
        
        
            8
        
        
        
            1997/06/27
        
        
        
            around
        
        
    
    
    
        
        
            15
        
        
        
            (((⸱⸱⸱Heinrich⸱)))))
        
        
        
            Hertz
        
        
        
            116
        
        
        
            pizza
        
        
        
            8
        
        
        
            1956/11/30
        
        
        
            and
        
        
    
    
    
        
        
            16
        
        
        
            William
        
        
        
            Gilbert###
        
        
        
            886
        
        
        
            BEER
        
        
        
            2
        
        
        
            1958/03/26
        
        
        
            desert
        
        
    
    
    
        
        
            17
        
        
        
            Marie
        
        
        
            CURIE
        
        
        
            912
        
        
        
            Rice
        
        
        
            1
        
        
        
            2000/03/22
        
        
        
            you
        
        
    
    
    
        
        
            18
        
        
        
            Arthur
        
        
        
            COM%%%pton
        
        
        
            812
        
        
        
            110790
        
        
        
            5
        
        
        
            1899/01/01
        
        
        
            #
        
        
    
    
    
        
        
            19
        
        
        
            JAMES
        
        
        
            Chadwick
        
        
        
            467
        
        
        
            null
        
        
        
            10
        
        
        
            1921/05/03
        
        
        
            #
        
        
    
    
    



Viewing 19 of 19 rows / 8 columns
1 partition(s)

id 1 (string) nullable	firstName 2 (string) nullable	lastName 3 (string) nullable	billingId 4 (string) nullable	product 5 (string) nullable	price 6 (string) nullable	birth 7 (string) nullable	dummyCol 8 (string) nullable
1	Luis	Alvarez$$%!	123	Cake	10	1980/07/07	never
2	André	Ampère	423	piza	8	1950/07/08	gonna
3	NiELS	Böhr//((%%	551	pizza	8	1990/07/09	give
4	PAUL	dirac$	521	pizza	8	1954/07/10	you
5	Albert	Einstein	634	pizza	8	1990/07/11	up
6	Galileo	⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱GALiLEI	672	arepa	5	1930/08/12	never
7	CaRL	Ga%%%uss	323	taco	3	1970/07/13	gonna
8	David	H$$$ilbert	624	taaaccoo	3	1950/07/14	let
9	Johannes	KEPLER	735	taco	3	1920/04/22	you
10	JaMES	M$$ax%%well	875	taco	3	1923/03/12	down
11	Isaac	Newton	992	pasta	9	1999/02/15	never⸱
12	Emmy%%	Nöether$	234	pasta	9	1993/12/08	gonna
13	Max!!!	Planck!!!	111	hamburguer	4	1994/01/04	run⸱
14	Fred	Hoy&&&le	553	pizzza	8	1997/06/27	around
15	(((⸱⸱⸱Heinrich⸱)))))	Hertz	116	pizza	8	1956/11/30	and
16	William	Gilbert###	886	BEER	2	1958/03/26	desert
17	Marie	CURIE	912	Rice	1	2000/03/22	you
18	Arthur	COM%%%pton	812	110790	5	1899/01/01	#
19	JAMES	Chadwick	467	null	10	1921/05/03	#