In [1]:
#SKIP_COMPARE_OUTPUT
import pixiedust
In [2]:
data = [
("a", "Alice", 34, 60, 1),
("b", "Bob", 36, 70, 2),
("c", "Charlie", 30, 99, 6),
("d", "Hector", 75, 87, 9),
("a", "Alice", 34, 44, 12),
("b", "Bob", 36, 109, 33),
("c", "Charlie", 30, 56, 77),
("d", "Hector", 75, 99, 2),
("a", "Alice", 34, 68, 7),
("b", "Bob", 36, 156, 54),
("c", "Charlie", 30, 89, 23),
("d", "Hector", 75, 64, 88),
("a", "Alice", 34, 45, 44),
("b", "Bob", 36, 97, 33),
("c", "Charlie", 30, 85, 41),
("d", "Hector", 75, 79, 6),
("a", "Alice", 34, 63, 9),
("b", "Bob", 36, 100, 28),
("c", "Charlie", 30, 85, 12),
("d", "Hector", 75, 73, 9)
]
columns = ["id", "name", "age", "weight", "something"]
In [4]:
#TARGET=SPARK
sqlContext = SQLContext(sc)
dd = sqlContext.createDataFrame(data, columns)
dd.count()
Out[4]:
In [5]:
#TARGET=PLAIN
import pandas
dd = pandas.DataFrame( data, columns=columns )
In [6]:
display(dd,cell_id='0B4A2B85FB23459DAEE455A3F08458CD',handlerId='histogram',rendererId='matplotlib',no_gen_tests='true',valueFields='weight',rowCount='100',nostore_cw='1098',nostore_pixiedust='true',nostore_bokeh='false',prefix='c95db292')
In [7]:
#SKIP_COMPARE_OUTPUT
display(dd,cell_id='0B4A2B85FB23459DAEE455A3F08458CD',handlerId='lineChart',rendererId='matplotlib',no_gen_tests='true',valueFields='weight',rowCount='100',keyFields='age',aggregation='SUM',nostore_cw='1098',nostore_pixiedust='true',nostore_bokeh='false',prefix='a8e8d27d')
In [8]:
display(dd,cell_id='0B4A2B85FB23459DAEE455A3F08458CD',handlerId='scatterPlot',rendererId='matplotlib',no_gen_tests='true',valueFields='weight',rowCount='100',keyFields='age',aggregation='SUM',nostore_cw='1098',nostore_pixiedust='true',nostore_bokeh='false',prefix='4a06eca2')
In [ ]: