In [1]:
%AddJar file:///home/jovyan/work/data/postgresql-42.2.2.jar -f
%AddJar file:///home/jovyan/work/data/greenplum-spark_2.11-1.4.0-alpha.jar -f


UsageError: Line magic function `%AddJar` not found.

In [32]:
import org.apache.spark.sql.Row
import org.apache.spark.sql._
import org.apache.spark.sql.types.DataType._
import org.apache.spark.sql.types.{StructField, StructType}
import org.apache.spark.sql.types._

val data = Seq(
  Row(1, "a"),
  Row(5, "z")
)

val schema = StructType(
  List(
    StructField("num", IntegerType, true),
    StructField("letter", StringType, true)
  )
)

val df = spark.createDataFrame(
  spark.sparkContext.parallelize(data),
  schema
)

df.show()


+---+------+
|num|letter|
+---+------+
|  1|     a|
|  5|     z|
+---+------+

data = List([1,a], [5,z])
schema = StructType(StructField(num,IntegerType,true), StructField(letter,StringType,true))
df = [num: int, letter: string]
lastException: Throwable = null
Out[32]:
[num: int, letter: string]

In [ ]:
sc.version

In [ ]:
Class.forName("org.postgresql.Driver")

In [33]:
val dataFrame = spark.read.format("io.pivotal.greenplum.spark.GreenplumRelationProvider")
.option("dbtable", "usertable")
.option("url", "jdbc:postgresql://gpdbsne/basic_db")
.option("user", "gpadmin")
.option("password", "pivotal")
.option("partitionColumn", "id")
.load()


Out[33]:
Name: java.lang.NoSuchMethodError
Message: org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.getSchema(Ljava/sql/ResultSet;Lorg/apache/spark/sql/jdbc/JdbcDialect;)Lorg/apache/spark/sql/types/StructType;
StackTrace:   at io.pivotal.greenplum.spark.jdbc.Jdbc$.resolveTable(Jdbc.scala:298)
  at io.pivotal.greenplum.spark.GreenplumRelationProvider.createRelation(GreenplumRelationProvider.scala:46)
  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:340)
  at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:239)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:227)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:164)

In [ ]:
val opts = Map("url" -> "jdbc:postgresql://gpdbsne/basic_db?user=gpadmin&password=pivotal","dbtable" -> "usertable")
val df = spark.read.format("jdbc").options(opts).load()

In [23]:
val jdbcDF = spark.read
  .format("jdbc")
  .option("url", "jdbc:postgresql://gpdbsne:5432/basic_db")
  .option("dbtable", "usertable")
  .option("user", "gpadmin")
  .option("password", "pivotal")
  .option("partitionColumn", "id")
  .option("lowerBound", "0")
  .option("upperBound", "1000")
  .option("numPartitions", "100")
  .load()


lastException = null
Out[23]:
Name: java.lang.InstantiationException
Message: org.apache.spark.sql.execution.datasources.jdbc.DriverWrapper
StackTrace:   at org.apache.spark.sql.execution.datasources.jdbc.DriverRegistry$.register(DriverRegistry.scala:53)
  at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:55)
  at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:54)
  at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.resolveTable(JDBCRDD.scala:56)
  at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation.<init>(JDBCRelation.scala:115)
  at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:52)
  at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:340)
  at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:239)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:227)
  at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:164)
  ... 50 elided
Caused by: java.lang.NoSuchMethodException: org.apache.spark.sql.execution.datasources.jdbc.DriverWrapper.<init>()
  at java.lang.Class.getConstructor0(Class.java:3082)
  at java.lang.Class.newInstance(Class.java:412)

In [ ]: