In [28]:
%pylab inline
plot(arange(5))


Populating the interactive namespace from numpy and matplotlib
Out[28]:
[<matplotlib.lines.Line2D at 0x10b33d390>]

In [11]:
%pylab inline
plot(arange(5))


Populating the interactive namespace from numpy and matplotlib
Out[11]:
[<matplotlib.lines.Line2D at 0x106f8ee10>]

In [29]:
import pandas as pd
import numpy as np
import matplotlib as plt

dataframe = pd.read_csv("/Users/potty/Documents/GitHub/uip-pc4/07.Data.Science/Ejemplo/train.csv")

In [6]:
dataframe.head(5)


Out[6]:
Loan_ID Gender Married Dependents Education Self_Employed ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term Credit_History Property_Area Loan_Status
0 LP001002 Male No 0 Graduate No 5849 0.0 NaN 360.0 1.0 Urban Y
1 LP001003 Male Yes 1 Graduate No 4583 1508.0 128.0 360.0 1.0 Rural N
2 LP001005 Male Yes 0 Graduate Yes 3000 0.0 66.0 360.0 1.0 Urban Y
3 LP001006 Male Yes 0 Not Graduate No 2583 2358.0 120.0 360.0 1.0 Urban Y
4 LP001008 Male No 0 Graduate No 6000 0.0 141.0 360.0 1.0 Urban Y

In [7]:
dataframe.describe()


Out[7]:
ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term Credit_History
count 614.000000 614.000000 592.000000 600.00000 564.000000
mean 5403.459283 1621.245798 146.412162 342.00000 0.842199
std 6109.041673 2926.248369 85.587325 65.12041 0.364878
min 150.000000 0.000000 9.000000 12.00000 0.000000
25% 2877.500000 0.000000 100.000000 360.00000 1.000000
50% 3812.500000 1188.500000 128.000000 360.00000 1.000000
75% 5795.000000 2297.250000 168.000000 360.00000 1.000000
max 81000.000000 41667.000000 700.000000 480.00000 1.000000

In [8]:
dataframe['Property_Area'].value_counts()


Out[8]:
Semiurban    233
Urban        202
Rural        179
Name: Property_Area, dtype: int64

In [9]:
dataframe['Gender'].value_counts()


Out[9]:
Male      489
Female    112
Name: Gender, dtype: int64

In [22]:
dataframe['ApplicantIncome'].hist()


Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x1090ba978>

In [21]:
dataframe.boxplot(column='ApplicantIncome')


Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x10923c0f0>

In [17]:
import plotly.plotly as py
import plotly.figure_factory as ff

In [18]:
tabla = ff.create_table(dataframe)

In [19]:
py.iplot(tabla)


Aw, snap! We don't have an account for ''. Want to try again? You can authenticate with your email address or username. Sign in is not case sensitive.

Don't have an account? plot.ly

Questions? support@plot.ly
---------------------------------------------------------------------------
PlotlyError                               Traceback (most recent call last)
<ipython-input-19-960144c2bb2e> in <module>()
----> 1 py.iplot(tabla)

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/plotly/plotly/plotly.py in iplot(figure_or_data, **plot_options)
    162         embed_options['height'] = str(embed_options['height']) + 'px'
    163 
--> 164     return tools.embed(url, **embed_options)
    165 
    166 

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/plotly/tools.py in embed(file_owner_or_url, file_id, width, height)
    388         else:
    389             url = file_owner_or_url
--> 390         return PlotlyDisplay(url, width, height)
    391     else:
    392         if (get_config_defaults()['plotly_domain']

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/plotly/tools.py in __init__(self, url, width, height)
   1363         def __init__(self, url, width, height):
   1364             self.resource = url
-> 1365             self.embed_code = get_embed(url, width=width, height=height)
   1366             super(PlotlyDisplay, self).__init__(data=self.embed_code)
   1367 

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/plotly/tools.py in get_embed(file_owner_or_url, file_id, width, height)
    293                 "'{1}'."
    294                 "\nRun help on this function for more information."
--> 295                 "".format(url, plotly_rest_url))
    296         urlsplit = six.moves.urllib.parse.urlparse(url)
    297         file_owner = urlsplit.path.split('/')[1].split('~')[1]

PlotlyError: Because you didn't supply a 'file_id' in the call, we're assuming you're trying to snag a figure from a url. You supplied the url, '', we expected it to start with 'https://plot.ly'.
Run help on this function for more information.

In [30]:
%matplotlib inline

In [23]:
dataframe.boxplot(column='ApplicantIncome', by='Gender')


Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x10b7f0e80>

In [25]:
dataframe.boxplot(column='ApplicantIncome', by='Married')


Out[25]:
<matplotlib.axes._subplots.AxesSubplot at 0x10b2fba58>

In [31]:
temporal1 = dataframe['Credit_History'].value_counts(ascending=True)
temporal2 = dataframe.pivot_table(values='Loan_Status', index=['Credit_History'], aggfunc=lambda x: x.map({'Y':1, 'N':0}).mean())
print("Tabla de Frecuencia de Historial Crediticio")
print(temporal1)
print("Tabla de Probabilidad de Obtener un Prestamo por cada clas de Historial de Credito")
print(temporal2)


Tabla de Frecuencia de Historial Crediticio
0.0     89
1.0    475
Name: Credit_History, dtype: int64
Tabla de Probabilidad de Obtener un Prestamo por cada clas de Historial de Credito
                Loan_Status
Credit_History             
0.0                0.078652
1.0                0.795789

In [39]:
import matplotlib.pyplot as plt
figura = plt.figure(figsize=(8,4))
eje1 = figura.add_subplot(121)
eje1.set_xlabel('Historial de Credito')
eje1.set_ylabel('Conteo de Aplicantes')
eje1.set_title('Aplicantes por Historial de Credito')
temporal1.plot(kind='bar')

eje2 = figura.add_subplot(121)
eje2.set_xlabel('Historial de Credito')
eje2.set_ylabel('Probabilidad de Obtener Prestamo')
eje2.set_title('Probabilidad de Obtener Prestamos por Historial Crediticio')
temporal2.plot(kind='bar')


/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/matplotlib/cbook/deprecation.py:106: MatplotlibDeprecationWarning:

Adding an axes using the same arguments as a previous axes currently reuses the earlier instance.  In a future version, a new instance will always be created and returned.  Meanwhile, this warning can be suppressed, and the future behavior ensured, by passing a unique label to each axes instance.

Out[39]:
<matplotlib.axes._subplots.AxesSubplot at 0x10ca4c3c8>

In [ ]:
dataframe.apply(lambda x: sum(x.isnull(),axis=0))