In [3]:
import plotly
import plotly.plotly as py
from plotly.graph_objs import *
import pandas as pd
import math
from IPython.display import Image
plotly.tools.set_credentials_file(username='xjiang36', api_key='uZyWsdSH3xd9bxUefIFf')
# ASSUMPTIONS:
# 1.Data-set structure follows: NAME, DATE, LATITUED, LONGITUDE, CATEGORY, QUAN1, QUAN2, QUAN3,...QUANN.
# 2.SIZE
# 3.CATEGORY: TOP10 AND OTHERS
In [127]:
# read in volcano database data
#df = pd.read_csv('t3sample.csv',encoding='iso-8859-1')
df = pd.read_csv('Q3-moreCtgSample.csv',encoding='iso-8859-1')
colnames=df.columns.values
# frequency of Country
#freq = df
#freq = freq.Country.value_counts().reset_index().rename(columns={'index': 'x'})
freq=df[colnames[4]].value_counts().reset_index().rename(columns={'index': 'x'})
# plot(1) top 10 countries by total volcanoes
freqcol=freq.columns.values
# Top 10 category in colorful, others in grey
colorbar=["#FF0000","#FF8000","#FFFF00","#00FF00","#00FFFF","#0080FF","#0000FF","#7F00FF","#FF00FF","#FF007F","#C0C0C0"]
colorbars=[]
for i in range(len(df[colnames[4]])):
for j in range(len(freq)):
if df[colnames[4]][i]==freq["x"][j]:
if j<10:
colorbars.append(colorbar[j])
else:
colorbars.append(colorbar[10])
sizes=[]
for i in range(len(df[colnames[7]])):
if df[colnames[7]][i]>df[colnames[7]].median():
if df[colnames[7]][i]>(df[colnames[7]][df[colnames[7]]>df[colnames[7]].median()].median()):
sizes.append(15)
else:
sizes.append(9)
else:
if df[colnames[7]][i]>(df[colnames[7]][df[colnames[7]]<df[colnames[7]].median()].median()):
sizes.append(6)
else:
sizes.append(3)
topn=10
topfreq=freq[:(topn+1)]
topfreq.set_value(topn,freqcol[0],"Other")
topfreq.set_value(topn,freqcol[1],sum(freq[freqcol[1]][topn:]))
locations = Bar(x=topfreq[freqcol[0]],y=freq[freqcol[1]], marker=dict(color=colorbar))
# read in 3d volcano surface data
#df_v = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/volcano.csv')
# plot(2) 3d surface of volcano
#threed = Surface(z=df_v.values.tolist(), colorscale='Reds', showscale=False)
#logsizes=[]
#for i in range(len(df[colnames[4]])):
# logsize=math.log10(float(df[colnames[5]][i]))
# logsize=logsize*1.5
# logsizes.append(logsize)
# plot(3) scattergeo map of volcano locations
trace3 = {
"geo": "geo3",
"lon": df[colnames[3]],
"lat": df[colnames[2]],
"hoverinfo": 'text',
"marker": {
"size": sizes,
"opacity": 0.8,
"color": colorbars,
"colorscale": 'Viridis'
},
"mode": "markers",
"type": "scattergeo"
}
data = Data([locations, trace3])
# control the subplot below using domain in 'geo', 'scene', and 'axis'
layout = {
"plot_bgcolor": 'black',
"paper_bgcolor": 'black',
"titlefont": {
"size": 20,
"family": "Raleway"
},
"font": {
"color": 'white'
},
"dragmode": "zoom",
"geo3": {
"domain": {
"x": [0, 0.55],
"y": [0.18, 0.9]
},
"lakecolor": "rgba(127,205,255,1)",
"oceancolor": "rgb(6,66,115)",
"landcolor": 'white',
"projection": {"type": "orthographic"},
"scope": "world",
"showlakes": True,
"showocean": True,
"showland": True,
"bgcolor": 'black'
},
"margin": {
"r": 10,
"t": 25,
"b": 40,
"l": 60
},
"scene": {"domain": {
"x": [0.5, 1],
"y": [0, 0.55]
},
"xaxis": {"gridcolor": 'white'},
"yaxis": {"gridcolor": 'white'},
"zaxis": {"gridcolor": 'white'}
},
"showlegend": False,
"title": "<br>Distribution of \"%s\" (top%s)" % (colnames[5],topn),
"xaxis": {
"anchor": "y",
"domain": [0.6, 0.95]
},
"yaxis": {
"anchor": "x",
"domain": [0.1, 0.9],
"showgrid": False
}
}
annotations = { "text": "Histrogram of \"%s\" (top%s)" % (colnames[5],topn),
"showarrow": False,
"xref": "paper",
"yref": "paper",
"x": 0.87,
"y": 0.95}
annotations2 = { "text": "0%%-25%%:%s-%s".ljust(30)%(round(df[colnames[7]].min(),4),round(df[colnames[7]][df[colnames[7]]<df[colnames[7]].median()].median(),4))+"<br>"+"25%%-50%%:%s-%s".ljust(30)%(round(df[colnames[7]][df[colnames[7]]<df[colnames[7]].median()].median(),4),round(df[colnames[7]].median(),4))+"<br>"+"50%%-75%%:%s-%s".ljust(30)%(round(df[colnames[7]].median(),4),round(df[colnames[7]][df[colnames[7]]>df[colnames[7]].median()].median(),4))+"<br>"+"75%%-100%%:%s-%s".ljust(30)%(round(df[colnames[7]][df[colnames[7]]>df[colnames[7]].median()].median(),4),round(df[colnames[7]].max(),4)),
"showarrow": False,
"xref": "paper",
"yref": "paper",
"x": 0,
"y": 0}
layout['annotations'] = [annotations,annotations2]
fig = Figure(data=data, layout=layout)
py.iplot(fig, filename = "Mixed Subplots Volcano")
Out[127]:
In [ ]: