In [ ]:
%AddJar file:///home/jovyan/work/data/postgresql-42.2.2.jar -f
%AddJar file:///home/jovyan/work/data/greenplum-spark_2.11-1.4.0-alpha.jar -f
In [32]:
import org.apache.spark.sql.Row
import org.apache.spark.sql._
import org.apache.spark.sql.types.DataType._
import org.apache.spark.sql.types.{StructField, StructType}
import org.apache.spark.sql.types._
val data = Seq(
Row(1, "a"),
Row(5, "z")
)
val schema = StructType(
List(
StructField("num", IntegerType, true),
StructField("letter", StringType, true)
)
)
val df = spark.createDataFrame(
spark.sparkContext.parallelize(data),
schema
)
df.show()
Out[32]:
In [ ]:
sc.version
In [ ]:
Class.forName("org.postgresql.Driver")
In [33]:
val dataFrame = spark.read.format("io.pivotal.greenplum.spark.GreenplumRelationProvider")
.option("dbtable", "usertable")
.option("url", "jdbc:postgresql://gpdbsne/basic_db")
.option("user", "gpadmin")
.option("password", "pivotal")
.option("partitionColumn", "id")
.load()
Out[33]:
In [ ]:
val opts = Map("url" -> "jdbc:postgresql://gpdbsne/basic_db?user=gpadmin&password=pivotal","dbtable" -> "usertable")
val df = spark.read.format("jdbc").options(opts).load()
In [23]:
val jdbcDF = spark.read
.format("jdbc")
.option("url", "jdbc:postgresql://gpdbsne:5432/basic_db")
.option("dbtable", "usertable")
.option("user", "gpadmin")
.option("password", "pivotal")
.option("partitionColumn", "id")
.option("lowerBound", "0")
.option("upperBound", "1000")
.option("numPartitions", "100")
.load()
Out[23]:
In [ ]: