notebook.community



In [1]:

    
# UNCOMMENT THIS IF YOU'RE USING GOOGLE COLAB!

#!apt-get install openjdk-8-jdk-headless -qq > /dev/null
#!wget -q http://apache.osuosl.org/spark/spark-2.4.1/spark-2.4.1-bin-hadoop2.7.tgz
#!tar xf spark-2.4.1-bin-hadoop2.7.tgz
#!pip install optimuspyspark

# AFTER RUNNING THIS CELL, YOU MUST RESTART THE RUNTIME TO USE UPDATED VERSIONS OF PACKAGES!



In [2]:

    
# UNCOMMENT THIS IF YOU'RE USING GOOGLE COLAB!

#import os
#os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
#os.environ["SPARK_HOME"] = "/content/spark-2.3.3-bin-hadoop2.7"



In [3]:

    
# Run if you clone Optimus and want to hack it from the notebook else you must install

%load_ext autoreload
%autoreload 2
import sys
sys.path.append("..")



In [4]:

    
from optimus import Optimus









    



C:\Users\argenisleon\Anaconda3\lib\site-packages\socks.py:58: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working
  from collections import Callable

    You are using PySparkling of version 2.4.10, but your PySpark is of
    version 2.3.1. Please make sure Spark and PySparkling versions are compatible.

Load optimus local or in a cluster



In [5]:

    
# Create optimus
op = Optimus(master="local", app_name= "optimus", verbose = True)









    



INFO:optimus:Just check that Spark and all necessary environments vars are present...
INFO:optimus:-----
INFO:optimus:SPARK_HOME=C:\opt\spark\spark-2.3.1-bin-hadoop2.7
INFO:optimus:HADOOP_HOME=C:\opt\hadoop-2.7.7
INFO:optimus:PYSPARK_PYTHON=C:\Users\argenisleon\Anaconda3\python.exe
INFO:optimus:PYSPARK_DRIVER_PYTHON=jupyter
INFO:optimus:PYSPARK_SUBMIT_ARGS=--packages com.databricks:spark-avro_2.11:4.0.0 --conf "spark.sql.catalogImplementation=hive" pyspark-shell
INFO:optimus:JAVA_HOME=C:\java
INFO:optimus:Pyarrow Installed
INFO:optimus:-----
INFO:optimus:Starting or getting SparkSession and SparkContext...
INFO:optimus:Spark Version:2.3.1
INFO:optimus:
                             ____        __  _                     
                            / __ \____  / /_(_)___ ___  __  _______
                           / / / / __ \/ __/ / __ `__ \/ / / / ___/
                          / /_/ / /_/ / /_/ / / / / / / /_/ (__  ) 
                          \____/ .___/\__/_/_/ /_/ /_/\__,_/____/  
                              /_/                                  
                              
INFO:optimus:Transform and Roll out...
INFO:optimus:Optimus successfully imported. Have fun :).

Adding external packages



In [ ]:

    
op = Optimus(packages="datastax:spark-cassandra-connector:1.6.1-s_2.10")

Get the spark session



In [7]:

    
op.spark









    Out[7]:





            
                SparkSession - hive
                
        
            SparkContext

            Spark UI

            
              Version
                v2.3.1
              Master
                local
              AppName
                optimus

Get the spark context



In [8]:

    
op.sc









    Out[8]:





        
            SparkContext

            Spark UI

            
              Version
                v2.3.1
              Master
                local
              AppName
                optimus

Packages loaded



In [7]:

    
op.packages









    Out[7]:





['com.databricks:spark-avro_2.11:4.0.0']

Create dataframe



In [6]:

    
from pyspark.sql.types import StructType, StructField, StringType, BooleanType, IntegerType, ArrayType
from datetime import date, datetime

df = op.create.df(
    [
        "names", 
        "height(ft)", 
        "function", 
        "rank", 
        "age",
        "weight(t)",
        "japanese name",
        "last position seen",
        "date arrival",
        "last date seen",
        "attributes",
        "DateType",
        "Tiemstamp",
        "Cybertronian", 
        "function(binary)",
        "NullType",

    ],
    [
        ("Optim'us", 28, "Leader", 10, 5000000, 4.30, ["Inochi", "Convoy"], "19.442735,-99.201111", "1980/04/10",
         "2016/09/10", [8.5344, 4300.0], date(2016, 9, 10), datetime(2014, 6, 24), True, bytearray("Leader", "utf-8"),
         None),
        ("bumbl#ebéé  ", 17, "Espionage", 7, 5000000, 2.0, ["Bumble", "Goldback"], "10.642707,-71.612534", "1980/04/10",
         "2015/08/10", [5.334, 2000.0], date(2015, 8, 10), datetime(2014, 6, 24), True, bytearray("Espionage", "utf-8"),
         None),
        ("ironhide&", 26, "Security", 7, 5000000, 4.0, ["Roadbuster"], "37.789563,-122.400356", "1980/04/10",
         "2014/07/10", [7.9248, 4000.0], date(2014, 6, 24), datetime(2014, 6, 24), True, bytearray("Security", "utf-8"),
         None),
        ("Jazz", 13, "First Lieutenant", 8, 5000000, 1.80, ["Meister"], "33.670666,-117.841553", "1980/04/10",
         "2013/06/10", [3.9624, 1800.0], date(2013, 6, 24), datetime(2014, 6, 24), True,
         bytearray("First Lieutenant", "utf-8"), None),
        ("Megatron", None, "None", 10, 5000000, 5.70, ["Megatron"], None, "1980/04/10", "2012/05/10", [None, 5700.0],
         date(2012, 5, 10), datetime(2014, 6, 24), True, bytearray("None", "utf-8"), None),
        ("Metroplex_)^$", 300, "Battle Station", 8, 5000000, None, ["Metroflex"], None, "1980/04/10", "2011/04/10",
         [91.44, None], date(2011, 4, 10), datetime(2014, 6, 24), True, bytearray("Battle Station", "utf-8"), None),

    ]).h_repartition(1)

df.table()









    









Viewing 6 of 6 rows / 16 columns
1 partition(s)


    
    
        
        
            names
            1 (string)
            
                
                nullable
                
            
        
        
        
            height(ft)
            2 (int)
            
                
                nullable
                
            
        
        
        
            function
            3 (string)
            
                
                nullable
                
            
        
        
        
            rank
            4 (int)
            
                
                nullable
                
            
        
        
        
            age
            5 (int)
            
                
                nullable
                
            
        
        
        
            weight(t)
            6 (float)
            
                
                nullable
                
            
        
        
        
            japanese name
            7 (array<string>)
            
                
                nullable
                
            
        
        
        
            last position seen
            8 (string)
            
                
                nullable
                
            
        
        
        
            date arrival
            9 (string)
            
                
                nullable
                
            
        
        
        
            last date seen
            10 (string)
            
                
                nullable
                
            
        
        
        
            attributes
            11 (array<float>)
            
                
                nullable
                
            
        
        
        
            DateType
            12 (date)
            
                
                nullable
                
            
        
        
        
            Tiemstamp
            13 (timestamp)
            
                
                nullable
                
            
        
        
        
            Cybertronian
            14 (boolean)
            
                
                nullable
                
            
        
        
        
            function(binary)
            15 (binary)
            
                
                nullable
                
            
        
        
        
            NullType
            16 (null)
            
                
                nullable
                
            
        
        
    

    
    
    
    
        
        
            Optim'us
            
        
        
        
            28
            
        
        
        
            Leader
            
        
        
        
            10
            
        
        
        
            5000000
            
        
        
        
            4.300000190734863
            
        
        
        
            ['Inochi',⋅'Convoy']
            
        
        
        
            19.442735,-99.201111
            
        
        
        
            1980/04/10
            
        
        
        
            2016/09/10
            
        
        
        
            [8.53439998626709,⋅4300.0]
            
        
        
        
            2016-09-10
            
        
        
        
            2014-06-24⋅00:00:00
            
        
        
        
            True
            
        
        
        
            None
            
        
        
        
            None
            
        
        
    
    
    
        
        
            bumbl#ebéé⋅⋅
            
        
        
        
            17
            
        
        
        
            Espionage
            
        
        
        
            7
            
        
        
        
            5000000
            
        
        
        
            2.0
            
        
        
        
            ['Bumble',⋅'Goldback']
            
        
        
        
            10.642707,-71.612534
            
        
        
        
            1980/04/10
            
        
        
        
            2015/08/10
            
        
        
        
            [5.334000110626221,⋅2000.0]
            
        
        
        
            2015-08-10
            
        
        
        
            2014-06-24⋅00:00:00
            
        
        
        
            True
            
        
        
        
            None
            
        
        
        
            None
            
        
        
    
    
    
        
        
            ironhide&
            
        
        
        
            26
            
        
        
        
            Security
            
        
        
        
            7
            
        
        
        
            5000000
            
        
        
        
            4.0
            
        
        
        
            ['Roadbuster']
            
        
        
        
            37.789563,-122.400356
            
        
        
        
            1980/04/10
            
        
        
        
            2014/07/10
            
        
        
        
            [7.924799919128418,⋅4000.0]
            
        
        
        
            2014-06-24
            
        
        
        
            2014-06-24⋅00:00:00
            
        
        
        
            True
            
        
        
        
            None
            
        
        
        
            None
            
        
        
    
    
    
        
        
            Jazz
            
        
        
        
            13
            
        
        
        
            First⋅Lieutenant
            
        
        
        
            8
            
        
        
        
            5000000
            
        
        
        
            1.7999999523162842
            
        
        
        
            ['Meister']
            
        
        
        
            33.670666,-117.841553
            
        
        
        
            1980/04/10
            
        
        
        
            2013/06/10
            
        
        
        
            [3.962399959564209,⋅1800.0]
            
        
        
        
            2013-06-24
            
        
        
        
            2014-06-24⋅00:00:00
            
        
        
        
            True
            
        
        
        
            None
            
        
        
        
            None
            
        
        
    
    
    
        
        
            Megatron
            
        
        
        
            None
            
        
        
        
            None
            
        
        
        
            10
            
        
        
        
            5000000
            
        
        
        
            5.699999809265137
            
        
        
        
            ['Megatron']
            
        
        
        
            None
            
        
        
        
            1980/04/10
            
        
        
        
            2012/05/10
            
        
        
        
            [None,⋅5700.0]
            
        
        
        
            2012-05-10
            
        
        
        
            2014-06-24⋅00:00:00
            
        
        
        
            True
            
        
        
        
            None
            
        
        
        
            None
            
        
        
    
    
    
        
        
            Metroplex_)^$
            
        
        
        
            300
            
        
        
        
            Battle⋅Station
            
        
        
        
            8
            
        
        
        
            5000000
            
        
        
        
            None
            
        
        
        
            ['Metroflex']
            
        
        
        
            None
            
        
        
        
            1980/04/10
            
        
        
        
            2011/04/10
            
        
        
        
            [91.44000244140625,⋅None]
            
        
        
        
            2011-04-10
            
        
        
        
            2014-06-24⋅00:00:00
            
        
        
        
            True
            
        
        
        
            None
            
        
        
        
            None
            
        
        
    
    
    



Viewing 6 of 6 rows / 16 columns
1 partition(s)

Create Spark dataframe using a panda dataframe



In [ ]:

    
import pandas as pd

pdf = pd.DataFrame({'A': {0: 'a', 1: 'b', 2: 'c'},
                   'B': {0: 1, 1: 3, 2: 5},
                   'C': {0: 2, 1: 4, 2: 6}})

sdf = op.create.df(pdf=pdf)

concat dataframes

Concat dataframes two or multiple dataframes in a row or column way



In [ ]:

    
op.append([df,df], like="rows").table()

IO Operations

Load from file



In [20]:

    
df_csv =op.load.csv("data/foo.csv").table(5)









    









Viewing 5 of 19 rows / 8 columns
1 partition(s)


    
    
        
        
            id
            1 (int)
            
                
                nullable
                
            
        
        
        
            firstName
            2 (string)
            
                
                nullable
                
            
        
        
        
            lastName
            3 (string)
            
                
                nullable
                
            
        
        
        
            billingId
            4 (int)
            
                
                nullable
                
            
        
        
        
            product
            5 (string)
            
                
                nullable
                
            
        
        
        
            price
            6 (int)
            
                
                nullable
                
            
        
        
        
            birth
            7 (string)
            
                
                nullable
                
            
        
        
        
            dummyCol
            8 (string)
            
                
                nullable
                
            
        
        
    

    
    
    
    
        
        
            1
            
        
        
        
            Luis
            
        
        
        
            Alvarez$$%!
            
        
        
        
            123
            
        
        
        
            Cake
            
        
        
        
            10
            
        
        
        
            1980/07/07
            
        
        
        
            never
            
        
        
    
    
    
        
        
            2
            
        
        
        
            André
            
        
        
        
            Ampère
            
        
        
        
            423
            
        
        
        
            piza
            
        
        
        
            8
            
        
        
        
            1950/07/08
            
        
        
        
            gonna
            
        
        
    
    
    
        
        
            3
            
        
        
        
            NiELS
            
        
        
        
            Böhr//((%%
            
        
        
        
            551
            
        
        
        
            pizza
            
        
        
        
            8
            
        
        
        
            1990/07/09
            
        
        
        
            give
            
        
        
    
    
    
        
        
            4
            
        
        
        
            PAUL
            
        
        
        
            dirac$
            
        
        
        
            521
            
        
        
        
            pizza
            
        
        
        
            8
            
        
        
        
            1954/07/10
            
        
        
        
            you
            
        
        
    
    
    
        
        
            5
            
        
        
        
            Albert
            
        
        
        
            Einstein
            
        
        
        
            634
            
        
        
        
            pizza
            
        
        
        
            8
            
        
        
        
            1990/07/11
            
        
        
        
            up
            
        
        
    
    
    



Viewing 5 of 19 rows / 8 columns
1 partition(s)



In [14]:

    
df_csv =op.load.tsv("data/foo.tsv").table(5)









    









Viewing 4 of 4 rows / 5 columns
1 partition(s)


    
    
        
        
            Sepal length
            1 (string)
            
                
                nullable
                
            
        
        
        
            Sepal width
            2 (double)
            
                
                nullable
                
            
        
        
        
            Petal length
            3 (double)
            
                
                nullable
                
            
        
        
        
            Petal width
            4 (string)
            
                
                nullable
                
            
        
        
        
            Species
            5 (string)
            
                
                nullable
                
            
        
        
    

    
    
    
    
        
        
            5.0
            
        
        
        
            3.6
            
        
        
        
            1.4
            
        
        
        
            0.2
            
        
        
        
            I.⋅setosa
            
        
        
    
    
    
        
        
            5.0
            
        
        
        
            3.6
            
        
        
        
            1.4
            
        
        
        
            0.2
            
        
        
        
            I.⋅setosa
            
        
        
    
    
    
        
        
            5.0
            
        
        
        
            3.6
            
        
        
        
            1.4
            
        
        
        
            0.2
            
        
        
        
            I.⋅setosa
            
        
        
    
    
    
        
        
            5.0
            
        
        
        
            3.6
            
        
        
        
            1.4
            
        
        
        
            0.2
            
        
        
        
            I.⋅setosa
            
        
        
    
    
    



Viewing 4 of 4 rows / 5 columns
1 partition(s)



In [21]:

    
df_json =op.load.json("data/foo.json").table(5)









    









Viewing 5 of 19 rows / 8 columns
1 partition(s)


    
    
        
        
            billingId
            1 (bigint)
            
                
                nullable
                
            
        
        
        
            birth
            2 (string)
            
                
                nullable
                
            
        
        
        
            dummyCol
            3 (string)
            
                
                nullable
                
            
        
        
        
            firstName
            4 (string)
            
                
                nullable
                
            
        
        
        
            id
            5 (bigint)
            
                
                nullable
                
            
        
        
        
            lastName
            6 (string)
            
                
                nullable
                
            
        
        
        
            price
            7 (bigint)
            
                
                nullable
                
            
        
        
        
            product
            8 (string)
            
                
                nullable
                
            
        
        
    

    
    
    
    
        
        
            123
            
        
        
        
            1980/07/07
            
        
        
        
            never
            
        
        
        
            Luis
            
        
        
        
            1
            
        
        
        
            Alvarez$$%!
            
        
        
        
            10
            
        
        
        
            Cake
            
        
        
    
    
    
        
        
            423
            
        
        
        
            1950/07/08
            
        
        
        
            gonna
            
        
        
        
            André
            
        
        
        
            2
            
        
        
        
            Ampère
            
        
        
        
            8
            
        
        
        
            piza
            
        
        
    
    
    
        
        
            551
            
        
        
        
            1990/07/09
            
        
        
        
            give
            
        
        
        
            NiELS
            
        
        
        
            3
            
        
        
        
            Böhr//((%%
            
        
        
        
            8
            
        
        
        
            pizza
            
        
        
    
    
    
        
        
            521
            
        
        
        
            1954/07/10
            
        
        
        
            you
            
        
        
        
            PAUL
            
        
        
        
            4
            
        
        
        
            dirac$
            
        
        
        
            8
            
        
        
        
            pizza
            
        
        
    
    
    
        
        
            634
            
        
        
        
            1990/07/11
            
        
        
        
            up
            
        
        
        
            Albert
            
        
        
        
            5
            
        
        
        
            Einstein
            
        
        
        
            8
            
        
        
        
            pizza
            
        
        
    
    
    



Viewing 5 of 19 rows / 8 columns
1 partition(s)



In [22]:

    
df_parquet =op.load.parquet("data/foo.parquet").table(5)









    









Viewing 5 of 19 rows / 8 columns
1 partition(s)


    
    
        
        
            id
            1 (int)
            
                
                nullable
                
            
        
        
        
            firstName
            2 (string)
            
                
                nullable
                
            
        
        
        
            lastName
            3 (string)
            
                
                nullable
                
            
        
        
        
            billingId
            4 (int)
            
                
                nullable
                
            
        
        
        
            product
            5 (string)
            
                
                nullable
                
            
        
        
        
            price
            6 (int)
            
                
                nullable
                
            
        
        
        
            birth
            7 (string)
            
                
                nullable
                
            
        
        
        
            dummyCol
            8 (string)
            
                
                nullable
                
            
        
        
    

    
    
    
    
        
        
            1
            
        
        
        
            Luis
            
        
        
        
            Alvarez$$%!
            
        
        
        
            123
            
        
        
        
            Cake
            
        
        
        
            10
            
        
        
        
            1980/07/07
            
        
        
        
            never
            
        
        
    
    
    
        
        
            2
            
        
        
        
            André
            
        
        
        
            Ampère
            
        
        
        
            423
            
        
        
        
            piza
            
        
        
        
            8
            
        
        
        
            1950/07/08
            
        
        
        
            gonna
            
        
        
    
    
    
        
        
            3
            
        
        
        
            NiELS
            
        
        
        
            Böhr//((%%
            
        
        
        
            551
            
        
        
        
            pizza
            
        
        
        
            8
            
        
        
        
            1990/07/09
            
        
        
        
            give
            
        
        
    
    
    
        
        
            4
            
        
        
        
            PAUL
            
        
        
        
            dirac$
            
        
        
        
            521
            
        
        
        
            pizza
            
        
        
        
            8
            
        
        
        
            1954/07/10
            
        
        
        
            you
            
        
        
    
    
    
        
        
            5
            
        
        
        
            Albert
            
        
        
        
            Einstein
            
        
        
        
            634
            
        
        
        
            pizza
            
        
        
        
            8
            
        
        
        
            1990/07/11
            
        
        
        
            up
            
        
        
    
    
    



Viewing 5 of 19 rows / 8 columns
1 partition(s)

df_avro =op.load.avro("data/foo.avro").table(5)



In [15]:

    
df_avro =op.load.excel("data/titanic3.xls").table(5)









    









Viewing 5 of 1309 rows / 14 columns
8 partition(s)


    
    
        
        
            pclass
            1 (bigint)
            
                
                nullable
                
            
        
        
        
            survived
            2 (bigint)
            
                
                nullable
                
            
        
        
        
            name
            3 (string)
            
                
                nullable
                
            
        
        
        
            sex
            4 (string)
            
                
                nullable
                
            
        
        
        
            age
            5 (double)
            
                
                nullable
                
            
        
        
        
            sibsp
            6 (bigint)
            
                
                nullable
                
            
        
        
        
            parch
            7 (bigint)
            
                
                nullable
                
            
        
        
        
            ticket
            8 (string)
            
                
                nullable
                
            
        
        
        
            fare
            9 (double)
            
                
                nullable
                
            
        
        
        
            cabin
            10 (string)
            
                
                nullable
                
            
        
        
        
            embarked
            11 (string)
            
                
                nullable
                
            
        
        
        
            boat
            12 (string)
            
                
                nullable
                
            
        
        
        
            body
            13 (double)
            
                
                nullable
                
            
        
        
        
            home_dest
            14 (string)
            
                
                nullable
                
            
        
        
    

    
    
    
    
        
        
            1
        
        
        
            1
        
        
        
            Allen,⸱Miss.⸱Elisabeth⸱Walton
        
        
        
            female
        
        
        
            29.0
        
        
        
            0
        
        
        
            0
        
        
        
            24160
        
        
        
            211.3375
        
        
        
            B5
        
        
        
            S
        
        
        
            2
        
        
        
            nan
        
        
        
            St⸱Louis,⸱MO
        
        
    
    
    
        
        
            1
        
        
        
            1
        
        
        
            Allison,⸱Master.⸱Hudson⸱Trevor
        
        
        
            male
        
        
        
            0.9167
        
        
        
            1
        
        
        
            2
        
        
        
            113781
        
        
        
            151.55
        
        
        
            C22⸱C26
        
        
        
            S
        
        
        
            11
        
        
        
            nan
        
        
        
            Montreal,⸱PQ⸱/⸱Chesterville,⸱ON
        
        
    
    
    
        
        
            1
        
        
        
            0
        
        
        
            Allison,⸱Miss.⸱Helen⸱Loraine
        
        
        
            female
        
        
        
            2.0
        
        
        
            1
        
        
        
            2
        
        
        
            113781
        
        
        
            151.55
        
        
        
            C22⸱C26
        
        
        
            S
        
        
        
            nan
        
        
        
            nan
        
        
        
            Montreal,⸱PQ⸱/⸱Chesterville,⸱ON
        
        
    
    
    
        
        
            1
        
        
        
            0
        
        
        
            Allison,⸱Mr.⸱Hudson⸱Joshua⸱Creighton
        
        
        
            male
        
        
        
            30.0
        
        
        
            1
        
        
        
            2
        
        
        
            113781
        
        
        
            151.55
        
        
        
            C22⸱C26
        
        
        
            S
        
        
        
            nan
        
        
        
            135.0
        
        
        
            Montreal,⸱PQ⸱/⸱Chesterville,⸱ON
        
        
    
    
    
        
        
            1
        
        
        
            0
        
        
        
            Allison,⸱Mrs.⸱Hudson⸱J⸱C⸱(Bessie⸱Waldo⸱Daniels)
        
        
        
            female
        
        
        
            25.0
        
        
        
            1
        
        
        
            2
        
        
        
            113781
        
        
        
            151.55
        
        
        
            C22⸱C26
        
        
        
            S
        
        
        
            nan
        
        
        
            nan
        
        
        
            Montreal,⸱PQ⸱/⸱Chesterville,⸱ON
        
        
    
    
    



Viewing 5 of 1309 rows / 14 columns
8 partition(s)

Load from URL



In [16]:

    
df_csv =op.load.csv("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv")
df_csv.table()









    









Viewing 19 of 19 rows / 8 columns
1 partition(s)


    
    
        
        
            id
            1 (int)
            
                
                nullable
                
            
        
        
        
            firstName
            2 (string)
            
                
                nullable
                
            
        
        
        
            lastName
            3 (string)
            
                
                nullable
                
            
        
        
        
            billingId
            4 (int)
            
                
                nullable
                
            
        
        
        
            product
            5 (string)
            
                
                nullable
                
            
        
        
        
            price
            6 (int)
            
                
                nullable
                
            
        
        
        
            birth
            7 (string)
            
                
                nullable
                
            
        
        
        
            dummyCol
            8 (string)
            
                
                nullable
                
            
        
        
    

    
    
    
    
        
        
            1
        
        
        
            Luis
        
        
        
            Alvarez$$%!
        
        
        
            123
        
        
        
            Cake
        
        
        
            10
        
        
        
            1980/07/07
        
        
        
            never
        
        
    
    
    
        
        
            2
        
        
        
            André
        
        
        
            Ampère
        
        
        
            423
        
        
        
            piza
        
        
        
            8
        
        
        
            1950/07/08
        
        
        
            gonna
        
        
    
    
    
        
        
            3
        
        
        
            NiELS
        
        
        
            Böhr//((%%
        
        
        
            551
        
        
        
            pizza
        
        
        
            8
        
        
        
            1990/07/09
        
        
        
            give
        
        
    
    
    
        
        
            4
        
        
        
            PAUL
        
        
        
            dirac$
        
        
        
            521
        
        
        
            pizza
        
        
        
            8
        
        
        
            1954/07/10
        
        
        
            you
        
        
    
    
    
        
        
            5
        
        
        
            Albert
        
        
        
            Einstein
        
        
        
            634
        
        
        
            pizza
        
        
        
            8
        
        
        
            1990/07/11
        
        
        
            up
        
        
    
    
    
        
        
            6
        
        
        
            Galileo
        
        
        
            ⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱GALiLEI
        
        
        
            672
        
        
        
            arepa
        
        
        
            5
        
        
        
            1930/08/12
        
        
        
            never
        
        
    
    
    
        
        
            7
        
        
        
            CaRL
        
        
        
            Ga%%%uss
        
        
        
            323
        
        
        
            taco
        
        
        
            3
        
        
        
            1970/07/13
        
        
        
            gonna
        
        
    
    
    
        
        
            8
        
        
        
            David
        
        
        
            H$$$ilbert
        
        
        
            624
        
        
        
            taaaccoo
        
        
        
            3
        
        
        
            1950/07/14
        
        
        
            let
        
        
    
    
    
        
        
            9
        
        
        
            Johannes
        
        
        
            KEPLER
        
        
        
            735
        
        
        
            taco
        
        
        
            3
        
        
        
            1920/04/22
        
        
        
            you
        
        
    
    
    
        
        
            10
        
        
        
            JaMES
        
        
        
            M$$ax%%well
        
        
        
            875
        
        
        
            taco
        
        
        
            3
        
        
        
            1923/03/12
        
        
        
            down
        
        
    
    
    
        
        
            11
        
        
        
            Isaac
        
        
        
            Newton
        
        
        
            992
        
        
        
            pasta
        
        
        
            9
        
        
        
            1999/02/15
        
        
        
            never⸱
        
        
    
    
    
        
        
            12
        
        
        
            Emmy%%
        
        
        
            Nöether$
        
        
        
            234
        
        
        
            pasta
        
        
        
            9
        
        
        
            1993/12/08
        
        
        
            gonna
        
        
    
    
    
        
        
            13
        
        
        
            Max!!!
        
        
        
            Planck!!!
        
        
        
            111
        
        
        
            hamburguer
        
        
        
            4
        
        
        
            1994/01/04
        
        
        
            run⸱
        
        
    
    
    
        
        
            14
        
        
        
            Fred
        
        
        
            Hoy&&&le
        
        
        
            553
        
        
        
            pizzza
        
        
        
            8
        
        
        
            1997/06/27
        
        
        
            around
        
        
    
    
    
        
        
            15
        
        
        
            (((⸱⸱⸱Heinrich⸱)))))
        
        
        
            Hertz
        
        
        
            116
        
        
        
            pizza
        
        
        
            8
        
        
        
            1956/11/30
        
        
        
            and
        
        
    
    
    
        
        
            16
        
        
        
            William
        
        
        
            Gilbert###
        
        
        
            886
        
        
        
            BEER
        
        
        
            2
        
        
        
            1958/03/26
        
        
        
            desert
        
        
    
    
    
        
        
            17
        
        
        
            Marie
        
        
        
            CURIE
        
        
        
            912
        
        
        
            Rice
        
        
        
            1
        
        
        
            2000/03/22
        
        
        
            you
        
        
    
    
    
        
        
            18
        
        
        
            Arthur
        
        
        
            COM%%%pton
        
        
        
            812
        
        
        
            110790
        
        
        
            5
        
        
        
            1899/01/01
        
        
        
            #
        
        
    
    
    
        
        
            19
        
        
        
            JAMES
        
        
        
            Chadwick
        
        
        
            467
        
        
        
            null
        
        
        
            10
        
        
        
            1921/05/03
        
        
        
            #
        
        
    
    
    



Viewing 19 of 19 rows / 8 columns
1 partition(s)



In [17]:

    
df_json =op.load.json("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.json")
df_json.table()









    









Viewing 19 of 19 rows / 8 columns
1 partition(s)


    
    
        
        
            billingId
            1 (bigint)
            
                
                nullable
                
            
        
        
        
            birth
            2 (string)
            
                
                nullable
                
            
        
        
        
            dummyCol
            3 (string)
            
                
                nullable
                
            
        
        
        
            firstName
            4 (string)
            
                
                nullable
                
            
        
        
        
            id
            5 (bigint)
            
                
                nullable
                
            
        
        
        
            lastName
            6 (string)
            
                
                nullable
                
            
        
        
        
            price
            7 (bigint)
            
                
                nullable
                
            
        
        
        
            product
            8 (string)
            
                
                nullable
                
            
        
        
    

    
    
    
    
        
        
            123
        
        
        
            1980/07/07
        
        
        
            never
        
        
        
            Luis
        
        
        
            1
        
        
        
            Alvarez$$%!
        
        
        
            10
        
        
        
            Cake
        
        
    
    
    
        
        
            423
        
        
        
            1950/07/08
        
        
        
            gonna
        
        
        
            André
        
        
        
            2
        
        
        
            Ampère
        
        
        
            8
        
        
        
            piza
        
        
    
    
    
        
        
            551
        
        
        
            1990/07/09
        
        
        
            give
        
        
        
            NiELS
        
        
        
            3
        
        
        
            Böhr//((%%
        
        
        
            8
        
        
        
            pizza
        
        
    
    
    
        
        
            521
        
        
        
            1954/07/10
        
        
        
            you
        
        
        
            PAUL
        
        
        
            4
        
        
        
            dirac$
        
        
        
            8
        
        
        
            pizza
        
        
    
    
    
        
        
            634
        
        
        
            1990/07/11
        
        
        
            up
        
        
        
            Albert
        
        
        
            5
        
        
        
            Einstein
        
        
        
            8
        
        
        
            pizza
        
        
    
    
    
        
        
            672
        
        
        
            1930/08/12
        
        
        
            never
        
        
        
            Galileo
        
        
        
            6
        
        
        
            ⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱GALiLEI
        
        
        
            5
        
        
        
            arepa
        
        
    
    
    
        
        
            323
        
        
        
            1970/07/13
        
        
        
            gonna
        
        
        
            CaRL
        
        
        
            7
        
        
        
            Ga%%%uss
        
        
        
            3
        
        
        
            taco
        
        
    
    
    
        
        
            624
        
        
        
            1950/07/14
        
        
        
            let
        
        
        
            David
        
        
        
            8
        
        
        
            H$$$ilbert
        
        
        
            3
        
        
        
            taaaccoo
        
        
    
    
    
        
        
            735
        
        
        
            1920/04/22
        
        
        
            you
        
        
        
            Johannes
        
        
        
            9
        
        
        
            KEPLER
        
        
        
            3
        
        
        
            taco
        
        
    
    
    
        
        
            875
        
        
        
            1923/03/12
        
        
        
            down
        
        
        
            JaMES
        
        
        
            10
        
        
        
            M$$ax%%well
        
        
        
            3
        
        
        
            taco
        
        
    
    
    
        
        
            992
        
        
        
            1999/02/15
        
        
        
            never⸱
        
        
        
            Isaac
        
        
        
            11
        
        
        
            Newton
        
        
        
            9
        
        
        
            pasta
        
        
    
    
    
        
        
            234
        
        
        
            1993/12/08
        
        
        
            gonna
        
        
        
            Emmy%%
        
        
        
            12
        
        
        
            Nöether$
        
        
        
            9
        
        
        
            pasta
        
        
    
    
    
        
        
            111
        
        
        
            1994/01/04
        
        
        
            run⸱
        
        
        
            Max!!!
        
        
        
            13
        
        
        
            Planck!!!
        
        
        
            4
        
        
        
            hamburguer
        
        
    
    
    
        
        
            553
        
        
        
            1997/06/27
        
        
        
            around
        
        
        
            Fred
        
        
        
            14
        
        
        
            Hoy&&&le
        
        
        
            8
        
        
        
            pizzza
        
        
    
    
    
        
        
            116
        
        
        
            1956/11/30
        
        
        
            and
        
        
        
            (((⸱⸱⸱Heinrich⸱)))))
        
        
        
            15
        
        
        
            Hertz
        
        
        
            8
        
        
        
            pizza
        
        
    
    
    
        
        
            886
        
        
        
            1958/03/26
        
        
        
            desert
        
        
        
            William
        
        
        
            16
        
        
        
            Gilbert###
        
        
        
            2
        
        
        
            BEER
        
        
    
    
    
        
        
            912
        
        
        
            2000/03/22
        
        
        
            you
        
        
        
            Marie
        
        
        
            17
        
        
        
            CURIE
        
        
        
            1
        
        
        
            Rice
        
        
    
    
    
        
        
            812
        
        
        
            1899/01/01
        
        
        
            #
        
        
        
            Arthur
        
        
        
            18
        
        
        
            COM%%%pton
        
        
        
            5
        
        
        
            110790
        
        
    
    
    
        
        
            467
        
        
        
            1921/05/03
        
        
        
            #
        
        
        
            JAMES
        
        
        
            19
        
        
        
            Chadwick
        
        
        
            10
        
        
        
            null
        
        
    
    
    



Viewing 19 of 19 rows / 8 columns
1 partition(s)



In [25]:

    
df_parquet =op.load.parquet("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.parquet")
df_parquet.table()









    



C:\Users\ARGENI~1\AppData\Local\Temp\tmpvhxr0yyy.parquet






    









Viewing 19 of 19 rows / 8 columns
1 partition(s)


    
    
        
        
            id
            1 (int)
            
                
                nullable
                
            
        
        
        
            firstName
            2 (string)
            
                
                nullable
                
            
        
        
        
            lastName
            3 (string)
            
                
                nullable
                
            
        
        
        
            billingId
            4 (int)
            
                
                nullable
                
            
        
        
        
            product
            5 (string)
            
                
                nullable
                
            
        
        
        
            price
            6 (int)
            
                
                nullable
                
            
        
        
        
            birth
            7 (string)
            
                
                nullable
                
            
        
        
        
            dummyCol
            8 (string)
            
                
                nullable
                
            
        
        
    

    
    
    
    
        
        
            1
        
        
        
            Luis
        
        
        
            Alvarez$$%!
        
        
        
            123
        
        
        
            Cake
        
        
        
            10
        
        
        
            1980/07/07
        
        
        
            never
        
        
    
    
    
        
        
            2
        
        
        
            André
        
        
        
            Ampère
        
        
        
            423
        
        
        
            piza
        
        
        
            8
        
        
        
            1950/07/08
        
        
        
            gonna
        
        
    
    
    
        
        
            3
        
        
        
            NiELS
        
        
        
            Böhr//((%%
        
        
        
            551
        
        
        
            pizza
        
        
        
            8
        
        
        
            1990/07/09
        
        
        
            give
        
        
    
    
    
        
        
            4
        
        
        
            PAUL
        
        
        
            dirac$
        
        
        
            521
        
        
        
            pizza
        
        
        
            8
        
        
        
            1954/07/10
        
        
        
            you
        
        
    
    
    
        
        
            5
        
        
        
            Albert
        
        
        
            Einstein
        
        
        
            634
        
        
        
            pizza
        
        
        
            8
        
        
        
            1990/07/11
        
        
        
            up
        
        
    
    
    
        
        
            6
        
        
        
            Galileo
        
        
        
            ⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱GALiLEI
        
        
        
            672
        
        
        
            arepa
        
        
        
            5
        
        
        
            1930/08/12
        
        
        
            never
        
        
    
    
    
        
        
            7
        
        
        
            CaRL
        
        
        
            Ga%%%uss
        
        
        
            323
        
        
        
            taco
        
        
        
            3
        
        
        
            1970/07/13
        
        
        
            gonna
        
        
    
    
    
        
        
            8
        
        
        
            David
        
        
        
            H$$$ilbert
        
        
        
            624
        
        
        
            taaaccoo
        
        
        
            3
        
        
        
            1950/07/14
        
        
        
            let
        
        
    
    
    
        
        
            9
        
        
        
            Johannes
        
        
        
            KEPLER
        
        
        
            735
        
        
        
            taco
        
        
        
            3
        
        
        
            1920/04/22
        
        
        
            you
        
        
    
    
    
        
        
            10
        
        
        
            JaMES
        
        
        
            M$$ax%%well
        
        
        
            875
        
        
        
            taco
        
        
        
            3
        
        
        
            1923/03/12
        
        
        
            down
        
        
    
    
    
        
        
            11
        
        
        
            Isaac
        
        
        
            Newton
        
        
        
            992
        
        
        
            pasta
        
        
        
            9
        
        
        
            1999/02/15
        
        
        
            never⸱
        
        
    
    
    
        
        
            12
        
        
        
            Emmy%%
        
        
        
            Nöether$
        
        
        
            234
        
        
        
            pasta
        
        
        
            9
        
        
        
            1993/12/08
        
        
        
            gonna
        
        
    
    
    
        
        
            13
        
        
        
            Max!!!
        
        
        
            Planck!!!
        
        
        
            111
        
        
        
            hamburguer
        
        
        
            4
        
        
        
            1994/01/04
        
        
        
            run⸱
        
        
    
    
    
        
        
            14
        
        
        
            Fred
        
        
        
            Hoy&&&le
        
        
        
            553
        
        
        
            pizzza
        
        
        
            8
        
        
        
            1997/06/27
        
        
        
            around
        
        
    
    
    
        
        
            15
        
        
        
            (((⸱⸱⸱Heinrich⸱)))))
        
        
        
            Hertz
        
        
        
            116
        
        
        
            pizza
        
        
        
            8
        
        
        
            1956/11/30
        
        
        
            and
        
        
    
    
    
        
        
            16
        
        
        
            William
        
        
        
            Gilbert###
        
        
        
            886
        
        
        
            BEER
        
        
        
            2
        
        
        
            1958/03/26
        
        
        
            desert
        
        
    
    
    
        
        
            17
        
        
        
            Marie
        
        
        
            CURIE
        
        
        
            912
        
        
        
            Rice
        
        
        
            1
        
        
        
            2000/03/22
        
        
        
            you
        
        
    
    
    
        
        
            18
        
        
        
            Arthur
        
        
        
            COM%%%pton
        
        
        
            812
        
        
        
            110790
        
        
        
            5
        
        
        
            1899/01/01
        
        
        
            #
        
        
    
    
    
        
        
            19
        
        
        
            JAMES
        
        
        
            Chadwick
        
        
        
            467
        
        
        
            null
        
        
        
            10
        
        
        
            1921/05/03
        
        
        
            #
        
        
    
    
    



Viewing 19 of 19 rows / 8 columns
1 partition(s)

df_avro =op.load.avro("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.avro", "avro") df_avro.table()

Save to file



In [ ]:

    
df_csv.save.csv("test.csv")



In [ ]:

    
df_csv.save.json("test.json")



In [ ]:

    
df_csv.save.parquet("test.parquet")

df_csv.save.avro("test.avro")

names 1 (string) nullable	height(ft) 2 (int) nullable	function 3 (string) nullable	rank 4 (int) nullable	age 5 (int) nullable	weight(t) 6 (float) nullable	japanese name 7 (array<string>) nullable	last position seen 8 (string) nullable	date arrival 9 (string) nullable	last date seen 10 (string) nullable	attributes 11 (array<float>) nullable	DateType 12 (date) nullable	Tiemstamp 13 (timestamp) nullable	Cybertronian 14 (boolean) nullable	function(binary) 15 (binary) nullable	NullType 16 (null) nullable
Optim'us	28	Leader	10	5000000	4.300000190734863	['Inochi',⋅'Convoy']	19.442735,-99.201111	1980/04/10	2016/09/10	[8.53439998626709,⋅4300.0]	2016-09-10	2014-06-24⋅00:00:00	True	None	None
bumbl#ebéé⋅⋅	17	Espionage	7	5000000	2.0	['Bumble',⋅'Goldback']	10.642707,-71.612534	1980/04/10	2015/08/10	[5.334000110626221,⋅2000.0]	2015-08-10	2014-06-24⋅00:00:00	True	None	None
ironhide&	26	Security	7	5000000	4.0	['Roadbuster']	37.789563,-122.400356	1980/04/10	2014/07/10	[7.924799919128418,⋅4000.0]	2014-06-24	2014-06-24⋅00:00:00	True	None	None
Jazz	13	First⋅Lieutenant	8	5000000	1.7999999523162842	['Meister']	33.670666,-117.841553	1980/04/10	2013/06/10	[3.962399959564209,⋅1800.0]	2013-06-24	2014-06-24⋅00:00:00	True	None	None
Megatron	None	None	10	5000000	5.699999809265137	['Megatron']	None	1980/04/10	2012/05/10	[None,⋅5700.0]	2012-05-10	2014-06-24⋅00:00:00	True	None	None
Metroplex_)^$	300	Battle⋅Station	8	5000000	None	['Metroflex']	None	1980/04/10	2011/04/10	[91.44000244140625,⋅None]	2011-04-10	2014-06-24⋅00:00:00	True	None	None

id 1 (int) nullable	firstName 2 (string) nullable	lastName 3 (string) nullable	billingId 4 (int) nullable	product 5 (string) nullable	price 6 (int) nullable	birth 7 (string) nullable	dummyCol 8 (string) nullable
1	Luis	Alvarez$$%!	123	Cake	10	1980/07/07	never
2	André	Ampère	423	piza	8	1950/07/08	gonna
3	NiELS	Böhr//((%%	551	pizza	8	1990/07/09	give
4	PAUL	dirac$	521	pizza	8	1954/07/10	you
5	Albert	Einstein	634	pizza	8	1990/07/11	up

Sepal length 1 (string) nullable	Sepal width 2 (double) nullable	Petal length 3 (double) nullable	Petal width 4 (string) nullable	Species 5 (string) nullable
5.0	3.6	1.4	0.2	I.⋅setosa
5.0	3.6	1.4	0.2	I.⋅setosa
5.0	3.6	1.4	0.2	I.⋅setosa
5.0	3.6	1.4	0.2	I.⋅setosa

pclass 1 (bigint) nullable	survived 2 (bigint) nullable	name 3 (string) nullable	sex 4 (string) nullable	age 5 (double) nullable	sibsp 6 (bigint) nullable	parch 7 (bigint) nullable	ticket 8 (string) nullable	fare 9 (double) nullable	cabin 10 (string) nullable	embarked 11 (string) nullable	boat 12 (string) nullable	body 13 (double) nullable	home_dest 14 (string) nullable
1	1	Allen,⸱Miss.⸱Elisabeth⸱Walton	female	29.0	0	0	24160	211.3375	B5	S	2	nan	St⸱Louis,⸱MO
1	1	Allison,⸱Master.⸱Hudson⸱Trevor	male	0.9167	1	2	113781	151.55	C22⸱C26	S	11	nan	Montreal,⸱PQ⸱/⸱Chesterville,⸱ON
1	0	Allison,⸱Miss.⸱Helen⸱Loraine	female	2.0	1	2	113781	151.55	C22⸱C26	S	nan	nan	Montreal,⸱PQ⸱/⸱Chesterville,⸱ON
1	0	Allison,⸱Mr.⸱Hudson⸱Joshua⸱Creighton	male	30.0	1	2	113781	151.55	C22⸱C26	S	nan	135.0	Montreal,⸱PQ⸱/⸱Chesterville,⸱ON
1	0	Allison,⸱Mrs.⸱Hudson⸱J⸱C⸱(Bessie⸱Waldo⸱Daniels)	female	25.0	1	2	113781	151.55	C22⸱C26	S	nan	nan	Montreal,⸱PQ⸱/⸱Chesterville,⸱ON