In [3]:
%AddJar file:///home/jovyan/work/data/postgresql-42.2.2.jar -f

    
    #%AddJar file:///home/jovyan/work/data/greenplum-spark_2.11-1.4.0-alpha.jar -f


UsageError: Line magic function `%AddJar` not found.

In [4]:
import org.apache.spark.sql.Row
import org.apache.spark.sql._
import org.apache.spark.sql.types.DataType._
import org.apache.spark.sql.types.{StructField, StructType}
import org.apache.spark.sql.types._

val data = Seq(
  Row(1, "a"),
  Row(5, "z")
)

val schema = StructType(
  List(
    StructField("num", IntegerType, true),
    StructField("letter", StringType, true)
  )
)

val df = spark.createDataFrame(
  spark.sparkContext.parallelize(data),
  schema
)

df.show()


  File "<ipython-input-4-cfa2977469de>", line 4
    import org.apache.spark.sql.types.{StructField, StructType}
                                      ^
SyntaxError: invalid syntax

In [5]:
sc.version



NameErrorTraceback (most recent call last)
<ipython-input-5-0b52e24bf5ff> in <module>()
----> 1 sc.version

NameError: name 'sc' is not defined

In [5]:
Class.forName("org.postgresql.Driver")



NameErrorTraceback (most recent call last)
<ipython-input-5-55404b05eb67> in <module>()
----> 1 Class.forName("org.postgresql.Driver")
      2 

NameError: name 'Class' is not defined

In [6]:
val dataFrame = spark.read.format("io.pivotal.greenplum.spark.GreenplumRelationProvider")
.option("dbtable", "usertable")
.option("url", "jdbc:postgresql://gpdbsne/basic_db")
.option("user", "gpadmin")
.option("password", "pivotal")
.option("partitionColumn", "id")
.load()


  File "<ipython-input-6-0ff62d4ac697>", line 1
    val dataFrame = spark.read.format("io.pivotal.greenplum.spark.GreenplumRelationProvider")
                ^
SyntaxError: invalid syntax

In [7]:
val opts = Map("url" -> "jdbc:postgresql://gpdbsne/basic_db?user=gpadmin&password=pivotal","dbtable" -> "usertable")
val df = spark.read.format("jdbc").options(opts).load()


  File "<ipython-input-7-1bbc6f32e55a>", line 1
    val opts = Map("url" -> "jdbc:postgresql://gpdbsne/basic_db?user=gpadmin&password=pivotal","dbtable" -> "usertable")
           ^
SyntaxError: invalid syntax

In [23]:
val jdbcDF = spark.read
  .format("jdbc")
  .option("url", "jdbc:postgresql://gpdbsne:5432/basic_db")
  .option("dbtable", "usertable")
  .option("user", "gpadmin")
  .option("password", "pivotal")
  .option("partitionColumn", "id")
  .option("lowerBound", "0")
  .option("upperBound", "1000")
  .option("numPartitions", "100")
  .load()


lastException = null
Out[23]:
Name: java.lang.InstantiationException
Message: org.apache.spark.sql.execution.datasources.jdbc.DriverWrapper
StackTrace:   at org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry$.register(DriverRegistry.scala:53)
  at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:55)
  at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:54)
  at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.resolveTable(JDBCRDD.scala:56)
  at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation.<init>(JDBCRelation.scala:115)
  at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:52)
  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:340)
  at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:239)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:227)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:164)
  ... 50 elided
Caused by: java.lang.NoSuchMethodException: org.apache.spark.sql.execution.datasources.jdbc.DriverWrapper.<init>()
  at java.lang.Class.getConstructor0(Class.java:3082)
  at java.lang.Class.newInstance(Class.java:412)

In [ ]: