In [3]:
import plotly
import plotly.plotly as py
from plotly.graph_objs import *
import pandas as pd
import math
from IPython.display import Image

plotly.tools.set_credentials_file(username='xjiang36', api_key='uZyWsdSH3xd9bxUefIFf')

# ASSUMPTIONS:
# 1.Data-set structure follows: NAME, DATE, LATITUED, LONGITUDE, CATEGORY, QUAN1, QUAN2, QUAN3,...QUANN.
# 2.SIZE
# 3.CATEGORY: TOP10 AND OTHERS

In [127]:
# read in volcano database data
#df = pd.read_csv('t3sample.csv',encoding='iso-8859-1')
df = pd.read_csv('Q3-moreCtgSample.csv',encoding='iso-8859-1')
colnames=df.columns.values
# frequency of Country
#freq = df
#freq = freq.Country.value_counts().reset_index().rename(columns={'index': 'x'})
freq=df[colnames[4]].value_counts().reset_index().rename(columns={'index': 'x'})
# plot(1) top 10 countries by total volcanoes
freqcol=freq.columns.values

# Top 10 category in colorful, others in grey
colorbar=["#FF0000","#FF8000","#FFFF00","#00FF00","#00FFFF","#0080FF","#0000FF","#7F00FF","#FF00FF","#FF007F","#C0C0C0"]
colorbars=[]

for i in range(len(df[colnames[4]])):
    for j in range(len(freq)):
        if df[colnames[4]][i]==freq["x"][j]:
            if j<10:
                colorbars.append(colorbar[j])
            else:
                colorbars.append(colorbar[10])

sizes=[]
for i in range(len(df[colnames[7]])):
    if df[colnames[7]][i]>df[colnames[7]].median():
        if df[colnames[7]][i]>(df[colnames[7]][df[colnames[7]]>df[colnames[7]].median()].median()):
            sizes.append(15)
        else:
            sizes.append(9)
    else:
        if df[colnames[7]][i]>(df[colnames[7]][df[colnames[7]]<df[colnames[7]].median()].median()):
            sizes.append(6)
        else:
            sizes.append(3)

topn=10          
topfreq=freq[:(topn+1)]
topfreq.set_value(topn,freqcol[0],"Other")
topfreq.set_value(topn,freqcol[1],sum(freq[freqcol[1]][topn:]))

locations = Bar(x=topfreq[freqcol[0]],y=freq[freqcol[1]], marker=dict(color=colorbar))

# read in 3d volcano surface data
#df_v = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/volcano.csv')

# plot(2) 3d surface of volcano
#threed = Surface(z=df_v.values.tolist(), colorscale='Reds', showscale=False)
#logsizes=[]
#for i in range(len(df[colnames[4]])):
#    logsize=math.log10(float(df[colnames[5]][i]))
#    logsize=logsize*1.5
#    logsizes.append(logsize)



# plot(3)  scattergeo map of volcano locations
trace3 = {
  "geo": "geo3", 
  "lon": df[colnames[3]],
  "lat": df[colnames[2]],
  "hoverinfo": 'text',
  "marker": {
    "size": sizes,
    "opacity": 0.8,
    "color": colorbars,
    "colorscale": 'Viridis'
  }, 
  "mode": "markers", 
  "type": "scattergeo"
}

data = Data([locations, trace3])

# control the subplot below using domain in 'geo', 'scene', and 'axis'
layout = {
  "plot_bgcolor": 'black',
  "paper_bgcolor": 'black',
  "titlefont": {
      "size": 20,
      "family": "Raleway"
  },
  "font": {
      "color": 'white'
  },
  "dragmode": "zoom", 
  "geo3": {
    "domain": {
      "x": [0, 0.55], 
      "y": [0.18, 0.9]
    }, 
    "lakecolor": "rgba(127,205,255,1)",
    "oceancolor": "rgb(6,66,115)",
    "landcolor": 'white',
    "projection": {"type": "orthographic"}, 
    "scope": "world", 
    "showlakes": True,
    "showocean": True,
    "showland": True,
    "bgcolor": 'black'
  }, 
  "margin": {
    "r": 10, 
    "t": 25, 
    "b": 40, 
    "l": 60
  }, 
  "scene": {"domain": {
      "x": [0.5, 1], 
      "y": [0, 0.55]
    },
           "xaxis": {"gridcolor": 'white'},
           "yaxis": {"gridcolor": 'white'},
           "zaxis": {"gridcolor": 'white'}
           }, 
  "showlegend": False, 
  "title": "<br>Distribution of \"%s\" (top%s)" % (colnames[5],topn), 
  "xaxis": {
    "anchor": "y", 
    "domain": [0.6, 0.95]
  }, 
  "yaxis": {
    "anchor": "x", 
    "domain": [0.1, 0.9],
    "showgrid": False
  }
}

annotations = { "text": "Histrogram of \"%s\" (top%s)" % (colnames[5],topn),
               "showarrow": False,
               "xref": "paper",
               "yref": "paper",
               "x": 0.87,
               "y": 0.95}

annotations2 = { "text": "0%%-25%%:%s-%s".ljust(30)%(round(df[colnames[7]].min(),4),round(df[colnames[7]][df[colnames[7]]<df[colnames[7]].median()].median(),4))+"<br>"+"25%%-50%%:%s-%s".ljust(30)%(round(df[colnames[7]][df[colnames[7]]<df[colnames[7]].median()].median(),4),round(df[colnames[7]].median(),4))+"<br>"+"50%%-75%%:%s-%s".ljust(30)%(round(df[colnames[7]].median(),4),round(df[colnames[7]][df[colnames[7]]>df[colnames[7]].median()].median(),4))+"<br>"+"75%%-100%%:%s-%s".ljust(30)%(round(df[colnames[7]][df[colnames[7]]>df[colnames[7]].median()].median(),4),round(df[colnames[7]].max(),4)),
               "showarrow": False,
               "xref": "paper",
               "yref": "paper",
               "x": 0,
               "y": 0}

layout['annotations'] = [annotations,annotations2]

fig = Figure(data=data, layout=layout)
py.iplot(fig, filename = "Mixed Subplots Volcano")


Out[127]:

In [ ]: