In [1]:
ls
In [2]:
#Data Mine
#http://youtu.be/p8hle-ni-DM
#http://youtu.be/eRpFC2CKvao?list=PLyBBc46Y6aAz54aOUgKXXyTcEmpMisAq3
#Data = TestData.csv or <'Dataset.csv'>
%matplotlib inline
import pandas as pd
import pylab as pl
import matplotlib.pyplot as plt
import numpy as np
from pandas import *
from pylab import *
# Set some Pandas options
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 20)
pd.set_option('display.max_rows', 25)
#Get Data
testdata=open('TestData.csv')
#Read csv
testdata=pd.read_csv('TestData.csv')
#View .head
testdata.head()
Out[2]:
In [3]:
#View .tail
testdata.tail()
Out[3]:
In [4]:
%matplotlib inline
import pandas as pd
import pylab as pl
import matplotlib.pyplot as plt
import numpy as np
from pandas import *
from pylab import *
# Set some Pandas options
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 20)
pd.set_option('display.max_rows', 25)
#Dataframe
df = pd.DataFrame(data = testdata, columns=['AccountNo_t', 'Amount_t'])
df
#Use .head and .tail; or df.describe()
df.head()
Out[4]:
In [5]:
%matplotlib inline
import pandas as pd
import pylab as pl
import matplotlib.pyplot as plt
import numpy as np
from pandas import *
from pylab import *
#Load Data Source
testdata=pd.read_csv('TestData.csv')
#Build Dataframe
df = pd.DataFrame(data = testdata, columns=['AccountNo_t', 'Amount_t'])
df
#Create SimplePlot
figure()
df.plot()
xlabel('AccountNo_t')
ylabel('Amount')
title('AccountNo_t vs Amount')
show()
In [ ]:
from IPython.core.display import HTML
#http://nbviewer.ipython.org/github/jrjohansson/scientific-python-lectures/blob/master/Lecture-4-Matplotlib.ipynb
HTML("<iframe src=http://nbviewer.ipython.org/github/jrjohansson/scientific-python-lectures/blob/master/Lecture-4-Matplotlib.ipynb width=400 height=350></iframe>")
In [2]:
%matplotlib inline
import pandas as pd
import pylab as pl
import matplotlib.pyplot as plt
import numpy as np
from pandas import *
from pylab import *
#Load Data Source
testdata=pd.read_csv('TestData.csv')
#Build Dataframe
df = pd.DataFrame(data = testdata, columns=['AccountNo_t', 'Amount_t'])
df
#Create Plot with matplotlib object-oriented API
#Declare 'fig1' variable assign 'plt.figure()' value
fig1 = plt.figure(figsize=(8,4), dpi=200)
df.plot()
xlabel('AccountNo')
ylabel('Amount')
title('AccountNo vs Amount')
plt.show()
plt.iplot(df_to_iplot(df))
#Save fig
fig1.savefig("fig1.png", dpi=200)
In [1]:
%matplotlib inline
import pandas as pd
import pylab as pl
import matplotlib.pyplot as plt
import numpy as np
from pandas import *
from pylab import *
#Read Data Source
testdata=pd.read_csv('TestData.csv')
#Build Dataframe
df = pd.DataFrame(data = testdata, columns=['AccountNo_t', 'Amount_t'])
#Create Filter
df[df['Amount_t']<='10000']
#Create Plot with matplotlib object-oriented API
#Declare 'fig1' variable assign 'plt.figure()' value
fig1 = plt.figure(figsize=(8,4), dpi=200)
df.plot()
xlabel('AccountNo')
ylabel('Amount')
title('AccountNo vs Amount')
plt.show()
#Save fig
fig1.savefig("fig1.png", dpi=200)
In [ ]:
%matplotlib inline
import pandas as pd
import pylab as pl
import matplotlib.pyplot as plt
import numpy as np
from pandas import *
from pylab import *
#Read Data Source
testdata=pd.read_csv('TestData.csv')
#Build Dataframe
df = pd.DataFrame(data = testdata, columns=['AccountNo_t', 'Amount_t'])
#Create Filter
df[df['Amount_t']<='10000']
#Declare DataFrame Variables
df['Amount_t'].describe()
Amount = ('Amount_t')
Volume = count('AccountNo_t')
grouping -df.groupby['AccountNo_t']
x = 'Volume'
y = 'Amount'
#Slicing
df.ix[0:, ['Volume', 'Amount']]
#Use .head and .tail
df.head()
In [3]:
#Search
data['AccountNo_t']=='1020234'
In [1]:
#Scatter plot
%matplotlib inline
import pandas as pd
import pylab as pl
import matplotlib.pyplot as plt
import numpy as np
from pandas import *
from pylab import *
#Read Data Source
testdata=pd.read_csv('TestData.csv')
#Build Dataframe
df = pd.DataFrame(data = testdata, columns=['AccountNo_t', 'Amount_t'])
#Declare 'fig2' variable assign 'plt.figure()' value
fig2 = plt.figure(figsize=(8,4), dpi=200)
#Create Random Scatter Plot
N = 50
x = np.random.rand(N)
y = np.random.rand(N)
colors = np.random.rand(N)
area = np.pi * (15 * np.random.rand(N))**2 # 0 to 15 point radiuses
plt.scatter(x, y, s=area, c=colors, alpha=0.5)
plt.show()
#Save fig
fig2.savefig("fig2.png", dpi=200)
In [1]:
#Scatter plot
%matplotlib inline
import pandas as pd
import pylab as pl
import matplotlib.pyplot as plt
import numpy as np
from pandas import *
from pylab import *
#Load Data Source
testdata=pd.read_csv('TestData.csv')
#Build Dataframe
df = pd.DataFrame(data = testdata, columns=['AccountNo_t', 'Amount_t'])
# Plot outputs
pl.scatter(x, y, color='red')
pl.plot(x, regr.predict(x), color='blue', linewidth=3)
pl.xticks(())
pl.yticks(())
pl.show()
In [1]:
#http://nbviewer.ipython.org/gist/fonnesbeck/5850463
%matplotlib inline
import pandas as pd
import pylab as pl
import matplotlib.pyplot as plt
import numpy as np
from pandas import *
from sklearn import datasets, linear_model
# Set some Pandas options
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 20)
pd.set_option('display.max_rows', 25)
#Linear Regression
#Load Data Source
testdata=pd.read_csv('TestData.csv')
#Declare 'fig3' variable assign 'plt.figure()' value
fig3 = plt.figure(figsize=(8,4), dpi=200)
# Use only one feature
#Build Dataframe
aml = pd.DataFrame(data = testdata, columns=['Amount_t'])
aml_x = aml[:, np.newaxis]
aml_x_temp = aml_x[:, :, 2]
# Split the data into training/testing sets
aml_x_train = aml_x_temp[:-20]
aml_x_test = aml_x_temp[-20:]
from sklearn.datasets.samples_generator import make_regression
# this is our test set, it's just a straight line with some
# gaussian noise
X, Y = make_regression(n_samples=100, n_features=1, n_informative=1,\
random_state=0, noise=35)
# Split the targets into training/testing sets
aml_y_train = aml.target[:-20]
aml_y_test = aml.target[-20:]
# Create linear regression object
regr = linear_model.LinearRegression()
# Train the model using the training sets
regr.fit(aml_x_train, aml_x_train)
# The coefficients
print 'Coefficients: \n', regr.coef_
# The mean square error
print ("Residual sum of squares: %.2f" %
np.mean((regr.predict(aml_x_test) - aml_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print ('Variance score: %.2f' % regr.score(aml_x_test, aml_y_test))
# output = CTR[Amount_t['Total'] < 9999.99]
#output.plot(kind='bar')
#Save fig
fig3.savefig("fig3.png", dpi=200)
In [3]:
#http://nbviewer.ipython.org/gist/fonnesbeck/5850463
%matplotlib inline
import pandas as pd
import pylab
import matplotlib.pyplot as plt
import numpy as np
from pandas import *
#Load Data Source
testdata=pd.read_csv('TestData.csv')
#Build Dataframe
df = pd.DataFrame(data = testdata, columns=['AccountNo_t', 'Amount_t'])
#Declare 'fig2' variable assign 'plt.figure()' value
fig4 = plt.figure(figsize=(8,4), dpi=200)
# Set some Pandas options
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 20)
pd.set_option('display.max_rows', 25)
with mpl.rc_context(rc={'font.family': 'serif', 'font.weight': 'bold', 'font.size': 8}):
fig = plt.figure(figsize=(6,3))
ax1 = fig.add_subplot(121)
ax1.set_xlabel('AccountNo')
ax1.set_ylabel('Amount')
ax1.set_title("Scatterplot")
plt.plot(np.random.normal(size=100), np.random.normal(size=100), 'r.')
ax2 = fig.add_subplot(122)
plt.hist(np.random.normal(size=100), bins=15)
ax2.set_xlabel('sample')
ax2.set_ylabel('cumulative sum')
ax2.set_title("Normal distrubution")
plt.tight_layout()
plt.savefig("fig4.png", dpi=200)
In [ ]:
# https://pypi.python.org/pypi/solrpy/
import solr
# create a connection to a solr server
s = solr.SolrConnection('http://50.255.26.173:8983/solr')
# add a document to the index
doc = dict(
id=1,
title='Lucene in Action',
author=['Erik Hatcher', 'Otis Gospodnetić'],
)
s.add(doc, commit=True)
# do a search
response = s.query('title:lucene')
for hit in response.results:
print hit['title']
In [ ]:
from IPython.core.display import HTML
# http://sourceforge.net/projects/commonlogic/
HTML("<iframe src=http://sourceforge.net/projects/commonlogic/ width=400 height=350></iframe>")
In [ ]:
from IPython.core.display import HTML
# http://commonlogic.sourceforge.net/
HTML("<iframe src=http://commonlogic.sourceforge.net/ width=400 height=350></iframe>")
In [ ]:
%matplotlib inline
import pandas as pd
import pylab as pl
import matplotlib.pyplot as plt
import numpy as np
from pandas import *
# Set some Pandas options
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 20)
pd.set_option('display.max_rows', 25)
plt.plot(np.random.normal(size=100), np.random.normal(size=100), 'ro')
In [ ]:
from IPython.core.display import HTML
#http://nbviewer.ipython.org/gist/nipunreddevil/7734529
HTML("<iframe src=http://nbviewer.ipython.org/gist/nipunreddevil/7734529 width=400 height=350></iframe>")