In [3]:
#%matplotlib inline
import numpy as np
from sklearn.datasets import fetch_mldata
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn import preprocessing
from sklearn.metrics import classification_report
from sklearn.cross_validation import cross_val_score
from sklearn import svm
import scipy
import pandas as pd
from sklearn import linear_model 

pd.set_option('precision',4)
pd.set_option('display.width',120)

abalone = fetch_mldata('abalone')

abalone_df = pd.DataFrame(abalone.data, columns=['Sex','Length','Diameter','Height','Whole','Shucked','Viscera','Shell'])
#del abalone_df['Sex']

groups = list(abalone.target)
abalone_df['ring'] = pd.Series([abalone.target[k] for k in groups])
df_y = pd.DataFrame(abalone.target, columns=['Ring'])
#print (max(abalone.target), min(abalone.target))
print("Describe the abalone data")
print(abalone_df.head())



HTTPErrorTraceback (most recent call last)
<ipython-input-3-ec386496bd19> in <module>()
     15 pd.set_option('display.width',120)
     16 
---> 17 abalone = fetch_mldata('abalone')
     18 
     19 abalone_df = pd.DataFrame(abalone.data, columns=['Sex','Length','Diameter','Height','Whole','Shucked','Viscera','Shell'])

/usr/local/lib/python2.7/dist-packages/sklearn/datasets/mldata.pyc in fetch_mldata(dataname, target_name, data_name, transpose_data, data_home)
    140         urlname = MLDATA_BASE_URL % quote(dataname)
    141         try:
--> 142             mldata_url = urlopen(urlname)
    143         except HTTPError as e:
    144             if e.code == 404:

/usr/lib/python2.7/urllib2.pyc in urlopen(url, data, timeout, cafile, capath, cadefault, context)
    152     else:
    153         opener = _opener
--> 154     return opener.open(url, data, timeout)
    155 
    156 def install_opener(opener):

/usr/lib/python2.7/urllib2.pyc in open(self, fullurl, data, timeout)
    435         for processor in self.process_response.get(protocol, []):
    436             meth = getattr(processor, meth_name)
--> 437             response = meth(req, response)
    438 
    439         return response

/usr/lib/python2.7/urllib2.pyc in http_response(self, request, response)
    548         if not (200 <= code < 300):
    549             response = self.parent.error(
--> 550                 'http', request, response, code, msg, hdrs)
    551 
    552         return response

/usr/lib/python2.7/urllib2.pyc in error(self, proto, *args)
    467             http_err = 0
    468         args = (dict, proto, meth_name) + args
--> 469         result = self._call_chain(*args)
    470         if result:
    471             return result

/usr/lib/python2.7/urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args)
    407             func = getattr(handler, meth_name)
    408 
--> 409             result = func(*args)
    410             if result is not None:
    411                 return result

/usr/lib/python2.7/urllib2.pyc in http_error_302(self, req, fp, code, msg, headers)
    654         fp.close()
    655 
--> 656         return self.parent.open(new, timeout=req.timeout)
    657 
    658     http_error_301 = http_error_303 = http_error_307 = http_error_302

/usr/lib/python2.7/urllib2.pyc in open(self, fullurl, data, timeout)
    435         for processor in self.process_response.get(protocol, []):
    436             meth = getattr(processor, meth_name)
--> 437             response = meth(req, response)
    438 
    439         return response

/usr/lib/python2.7/urllib2.pyc in http_response(self, request, response)
    548         if not (200 <= code < 300):
    549             response = self.parent.error(
--> 550                 'http', request, response, code, msg, hdrs)
    551 
    552         return response

/usr/lib/python2.7/urllib2.pyc in error(self, proto, *args)
    473         if http_err:
    474             args = (dict, 'default', 'http_error_default') + orig_args
--> 475             return self._call_chain(*args)
    476 
    477 # XXX probably also want an abstract factory that knows when it makes

/usr/lib/python2.7/urllib2.pyc in _call_chain(self, chain, kind, meth_name, *args)
    407             func = getattr(handler, meth_name)
    408 
--> 409             result = func(*args)
    410             if result is not None:
    411                 return result

/usr/lib/python2.7/urllib2.pyc in http_error_default(self, req, fp, code, msg, hdrs)
    556 class HTTPDefaultErrorHandler(BaseHandler):
    557     def http_error_default(self, req, fp, code, msg, hdrs):
--> 558         raise HTTPError(req.get_full_url(), code, msg, hdrs, fp)
    559 
    560 class HTTPRedirectHandler(BaseHandler):

HTTPError: HTTP Error 500: Internal Server Error

In [ ]:
boxplots = abalone_df.boxplot(return_type='axes')
boxplots = abalone_df.boxplot(column='Length',by='ring',return_type='axes')
densityplot = abalone_df.plot(kind='density')

In [ ]:
#single_distribution = abalone_df['Length'].plot(kind='hist',alpha=0.5)
colors_palette = {0:"red",1: "green",2:"blue",3:"orange",4:"orange",5:"orange",6:"orange",7:"orange",8:"orange",9:"orange"}
#colors = [colors_palette[c] for c in groups]
jet = cm = plt.get_cmap('jet') 
simple_scatterplot = abalone_df.plot(kind='scatter',x=1,y=3, colormap='terrain') #,c=colors)

In [ ]:
hexbin = abalone_df.plot(kind='hexbin',x=1,y=3,gridsize=10)

In [ ]:
from pandas.tools.plotting import scatter_matrix
matrix_of_scatterplots = scatter_matrix(abalone_df,alpha=0.2, figsize=(20,20),diagonal='kde')

In [ ]:
from pandas.tools.plotting import parallel_coordinates
pl1 = parallel_coordinates(abalone_df,'ring')

In [ ]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(abalone_df.values, abalone.target, test_size=0.20, random_state=101)

C = 1 # SVM regularization parameter
clf = svm.SVC(kernel='rbf', degree=2,max_iter=-1, C=C,random_state=101)

print(type(y_train), y_train.shape)
  
clf.fit(X_train, y_train)

#scores = cross_val_score(clf,X_train,y_train,cv=2,scoring='accuracy',n_jobs=-1)

#print ("svc mean=%0.3f std=%0.3f" % (np.mean(scores),np.std(scores)))

y_pred = clf.predict(X_test)

#print("mean score: ", clf.score(X_test, y_test))
print(classification_report(y_test,y_pred))

In [ ]:
x = [[0, 0], [1, 1], [2, 2]]
y = [0,1,2]
print(type(x), len(y), type(y), len(y))

from sklearn import linear_model 

regressor = linear_model.LinearRegression(normalize=True)
regressor.fit (x, y)
print(regressor.coef_)


#plt.scatter(, color='black')


for row in x:
    print(row)

In [ ]:
data2 = [{'a': 1, 'b': 2}, {'a': 5, 'b': 10, 'c': 20}]
print(data2)
pd.DataFrame(data2, columns=['a', 'b'])

In [ ]:
from pandas.tools.plotting import scatter_matrix
colors_palette = {0:"red",1: "green",2:"blue"}
#colors = [colors_palette[c] for c in groups]
#matrix_of_scatterplots = scatter_matrix(pd,alpha=0.2,figsize=(8,8),diagonal='kde')

In [ ]:
print("Plot the data in 2 dimension for data visualization")
regressor = linear_model.LinearRegression()
plt.figure(figsize=(15,10))
for i in range(1,8):
    ax = plt.subplot(3,3, i)
    ax.scatter(d.data[:,i],df_y, color='black')
    regressor.fit(d.data[:,i].reshape(4177,1),d.target)
    y_pred=regressor.predict(d.data[:,i].reshape(4177,1))
    
    plt.plot(X[:,i].reshape(4177,1),y_pred)
plt.show()

In [ ]:


In [ ]: