You can load data files from Amazon S3 storage.
Collect your Amazon S3 connection information:
Import PixieDust and enable the Spark Job monitor
In [ ]:
    
import pixiedust
pixiedust.enableJobMonitor()
    
In [ ]:
    
# @hidden_cell
# Enter your S3 access key (e.g. 'A....K')
s3_access_key = '...'
# Enter your S3 secret key (e.g. 'S....K')
s3_secret_key = '...'
# Enter your S3 bucket name (e.g. 'my-source-bucket')
s3_bucket = '...'
# Enter your csv file name (e.g. 'my-data/my-file.csv' if _my-file_ is located in folder _my-data_)
s3_file_name = '....csv'
    
In [4]:
    
# no changes are required to this cell
from ingest import Connectors
from pyspark.sql import SQLContext
sqlContext = SQLContext(sc)
S3loadoptions = { 
                  Connectors.AmazonS3.ACCESS_KEY          : s3_access_key,
                  Connectors.AmazonS3.SECRET_KEY          : s3_secret_key,
                  Connectors.AmazonS3.SOURCE_BUCKET       : s3_bucket,
                  Connectors.AmazonS3.SOURCE_FILE_NAME    : s3_file_name,
                  Connectors.AmazonS3.SOURCE_INFER_SCHEMA : '1',
                  Connectors.AmazonS3.SOURCE_FILE_FORMAT  : 'csv'}
S3_data = sqlContext.read.format('com.ibm.spark.discover').options(**S3loadoptions).load()
    
In [5]:
    
display(S3_data)
    
    
In [ ]: