In [1]:
# Shiman Ding, shiman@berkeley.edu
# IEOR, 24104985
# 09/10/2016, AY250-HW2
# Data Visualization

In [2]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

0) Critique the most important figure from a seminal paper in your field. Provide the original figure/caption. In your own words, what story is this figure trying to convey? What does it do well? What could have been done better? What elements didn't need to be present to still convey the same story?


The figure is trying to show how min cuts can be achieved via clustering. The example is simple enough with insights. But G1 and G2 are not clearly defined in the graph. They may also use dashed line for cuts and clusters. They may also use different color for cuts and clusters. If flow is included in the graph, that would be better.

1) Reproduce one graph



In [32]:
data = pd.read_table("hw_2_data/ay250.txt", sep="\t")
data.head()


Out[32]:
maxratiode maxratio_cen_zio maxratio_cen_pot maxratiode2 averatiode averatio_cen_zio averatio_cen_pot averatiode2
1 1.004589 1.001530 1.434960 1.001530 1.003185 1.001062 1.204998 1.001062
2 1.016423 1.005474 1.551952 1.005474 1.012674 1.004225 1.227594 1.004225
3 1.034050 1.011350 1.605934 1.011350 1.024761 1.008254 1.524485 1.008254
4 1.063651 1.021217 1.391803 1.021217 1.042438 1.014146 1.208293 1.014146
5 1.075854 1.025285 1.303562 1.025285 1.057829 1.019276 1.259208 1.019276

In [55]:
np.shape(data)


Out[55]:
(50, 8)

In [63]:
fig = plt.figure()
ax = fig.add_subplot(111)
colors = ["red", "green", "blue", "black"]
linestyles = ["--", "-"]
for i, col in enumerate(data.columns):
    ax.plot(np.arange(50)/5, data[col], label = col, color = colors[i%4], linestyle = linestyles[int(i/4)])
ax.set_xlabel("Truck Capacity")
ax.set_ylabel("Ratio to Centralized Optimal Solution")
ax.set_title("Cost of Decentralization under different policies")
ax.legend()


Out[63]:
<matplotlib.legend.Legend at 0x129ad9630>

2) Reproduce in matplotlib the provided plot stocks.png Use the provided datafiles ny_temps.txt, yahoo_data.txt, and google_data.txt. Provide your new plot and the Python code.


In [3]:
ny_temps = np.loadtxt("hw_2_data/ny_temps.txt", skiprows=1)
yahoo_data = np.loadtxt("hw_2_data/yahoo_data.txt", skiprows=1)
google_data = np.loadtxt("hw_2_data/google_data.txt", skiprows=1)
fig = plt.figure()
ax1 = fig.add_subplot(111)

yahoo = ax1.plot(yahoo_data[:,0], yahoo_data[:,1], color = 'purple', 
         linestyle = '-', label = "Yahoo!Stock Value")
google = ax1.plot(google_data[:,0], google_data[:,1], color = 'blue', 
         linestyle = '-', label = "Google Stock Value")

# add secondary y axis
ax2 = ax1.twinx()
temps = ax2.plot(ny_temps[:,0], ny_temps[:,1], color = 'red', 
         linestyle = ':', label = "NY Mon. High Temp")

# add legend together
data = yahoo + google + temps
labs = [l.get_label() for l in data]
ax1.legend(data, labs, loc=(0.03,.45), prop={'size':7}, frameon=False)

ax1.set_title("New York Temperature, Google, and Yahoo!", 
              size = 16, family='Times New Roman', fontweight="bold")
ax1.set_xlabel("Date (MJD)")
ax1.set_ylabel("Value (Dollars)")
ax2.set_ylabel("Temperature ($^\circ$F)")
ax1.set_xlim(48800, 55620)
ax1.set_ylim(-20, 780)
ax1.minorticks_on()
ax2.set_ylim(-150,100)
ax2.minorticks_on()


3) Make a generic "Brushing" graph


In [1]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.patches import Rectangle

import numpy as np
import plotly
import pandas as pd
import datashader
import seaborn as sns
import sys
import os

from bokeh.io import output_file, show
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure

%matplotlib notebook

In [2]:
flowers = pd.read_table("hw_2_data/flowers.csv", sep=",").set_index("species")

In [19]:
class DrawClass:

    def __init__(self, data, colors):
        """ Initialize the brusher plots
        
        data: pd.DataFrame - figure will have NxN subplots where N
                                is the number of features/columns 
        colors: np.ndarray - the colors group each row into categories 
                                accordingly
        """
        self.data = data
        self.colors = colors
        self.size = data.shape[1]
        self.fig, self.axes = plt.subplots(self.size, self.size)
        self.ax_data = {}
        self.ax_dict = {}
        self.active = np.array(data.shape[0])
        
        self.rect = None
        self.current_axes = None
        self.xy0 = None
        self.xy1 = None
        
        
        for x_ix, x_data in enumerate(self.data.columns):
            for y_ix, y_data in enumerate(self.data.columns):
                ax_temp = self.axes[y_ix, x_ix]
                scatterplot = ax_temp.scatter(data[x_data], data[y_data], alpha = 0.4)
                
                ax_temp.set_xlim(self.data[x_data].min(), self.data[x_data].max())
                ax_temp.set_ylim(self.data[y_data].min(), self.data[y_data].max())
                
                ax_temp.xaxis.set_ticks([])
                ax_temp.yaxis.set_ticks([])
                
                scatterplot.set_color(colors)
                
                self.ax_data[x_ix, y_ix] = scatterplot
                self.ax_dict[str(ax_temp)] = (x_data, y_data)
                
        
        self.cids = {}
        self.cids['button_press_event'] = self.fig.canvas.mpl_connect('button_press_event', self.onclick)
        self.cids['button_release_event'] = self.fig.canvas.mpl_connect('button_release_event', self.offclick)
        
        
        self.fig.show()
        self.flush()
            
    def flush(self):
        """
        Desciption:
        Flush std out and draw canvas - to make sure everything is written right now
        """
        sys.stdout.flush()
        self.fig.canvas.draw()
        
        
    def onclick(self, event):
        if event.x is None or event.y is None:
            return
        
        #self.active = np.ones(self.data.shape[0])
        #self.update_colors(active = self.active.values)
        if self.rect is not None:
            self.rect.remove()
        self.ax0 = event.inaxes
        self.xy0 = (event.xdata, event.ydata)
        self.flush()
        
    def offclick(self, event):
        if event.xdata is None or event.ydata is None: return
        if event.inaxes != self.ax0: return
                
        self.xy1 = (event.xdata, event.ydata)
        
        # Make a rectangular, finding upper left point, width, height
        xmin = min(self.xy0[0], self.xy1[0])
        xmax = max(self.xy0[0], self.xy1[0])
        ymin = min(self.xy0[1], self.xy1[1])
        ymax = max(self.xy0[1], self.xy1[1])
        
        width = xmax - xmin
        height = ymax - ymin
        area = width * height
        
        self.rect = Rectangle((xmin, ymin), width, height, color = 'k', alpha = 0.1)
        self.ax0.add_patch(self.rect)
        self.update_state(xmin, xmax, ymin, ymax)
        if area < 0.01:
            self.update_colors()
        self.flush()


    def update_state(self, xmin, xmax, ymin, ymax):
        # find out column names
        x_label, y_label = self.ax_dict[str(self.ax0)]
        # get indices for active datas
        self.active = (self.data[x_label] > xmin) & (self.data[x_label] < xmax)
        self.active = self.active & (self.data[y_label] > ymin) & (self.data[y_label] < ymax)

        # update the colors
        self.update_colors(active = self.active.values)

    def update_colors(self, active = None):
        
        # update colors
        colors = self.colors.copy()
        if active is not None:
            colors[~active] = (0, 0, 0, 0.1)
            
        # set the colors for each axis
        for (x, y), data in self.ax_data.items():
            data.set_color(colors)

In [22]:
color_map = {
    'setosa': (0.6, 0, 0, 0.4),
    'versicolor': (0, 0.6, 0, 0.4),
    'virginica': (0, 0, 0.6, 0.4)
}
colors = np.array([color_map[x] for x in flowers.index])  
DrawClass(data = flowers, colors = colors)


Out[22]:
<__main__.DrawClass at 0x1260f6940>

I discussed HW with Kevin Li, Ying Cao, Tugce, Jing Dai