In [7]:
lista = [1, 2, 3, 4, 5]

In [8]:
lista[1:3]


Out[8]:
[2, 3]

In [9]:
a, b = lista[1:3]
a, b


Out[9]:
(2, 3)

In [10]:
len(lista)


Out[10]:
5

In [11]:
sum(lista)


Out[11]:
15

In [12]:
5 in lista


Out[12]:
True

In [13]:
7 in lista


Out[13]:
False

In [14]:
lista.append(7)
lista.append(6)
lista.append(7)
lista


Out[14]:
[1, 2, 3, 4, 5, 7, 6, 7]

In [15]:
lista.remove(7)
lista


Out[15]:
[1, 2, 3, 4, 5, 6, 7]

In [16]:
lista.extend([8, 9, -10])
lista


Out[16]:
[1, 2, 3, 4, 5, 6, 7, 8, 9, -10]

In [17]:
lista2 = lista
lista2[-1] = 10
print(lista)
print(lista2)


[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [19]:
lista3 = lista[:]
lista3[-1] = 10
print(lista)
print(lista3)


[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [20]:
lista[-1] = -11

In [21]:
lista


Out[21]:
[1, 2, 3, 4, 5, 6, 7, 8, 9, -11]

In [22]:
lista3 = lista[:]
lista3[-1] = 10
print(lista)
print(lista3)


[1, 2, 3, 4, 5, 6, 7, 8, 9, -11]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [23]:
range(5)


Out[23]:
range(0, 5)

In [24]:
range(5)


Out[24]:
range(0, 5)

In [25]:
range(5)


Out[25]:
range(0, 5)

In [26]:
range(5, 0, -1)


Out[26]:
range(5, 0, -1)

In [27]:
print(range(5))


range(0, 5)

In [28]:
lista4 = [-4, 1, -2, 3]
print(sorted(lista4)) # ordena sem alterar a lista
print(sorted(lista4, reverse=True))
print(sorted(lista4, key=abs))
print(lista4)


[-4, -2, 1, 3]
[3, 1, -2, -4]
[1, -2, 3, -4]
[-4, 1, -2, 3]

In [29]:
lista4.sort()
print(lista4)


[-4, -2, 1, 3]

In [30]:
a = ['a', 'casa', 'está', 'muito', 'mal', 'organizada']
print(' '.join(a))


a casa está muito mal organizada

In [31]:
print(''.join(a))


acasaestámuitomalorganizada

In [32]:
import random
random.shuffle(a)
print(a)
random.shuffle(a)
print(a)


['está', 'muito', 'a', 'mal', 'organizada', 'casa']
['está', 'a', 'muito', 'casa', 'organizada', 'mal']

In [33]:
random.choice(a)


Out[33]:
'muito'

In [34]:
dicionario = {'nome': 'Neto', 'sobrenome': 'Deolino'}
print(dicionario['nome'])
print(dicionario['sobrenome'])
print(dicionario)


Neto
Deolino
{'nome': 'Neto', 'sobrenome': 'Deolino'}

In [35]:
dicionario.keys()


Out[35]:
dict_keys(['nome', 'sobrenome'])

In [36]:
list1 = ['a', 'b', 'c']
list2 = [1, 2, 3]
list3 = zip(list1, list2)  # PACK
print(list3)
list4, list5 = zip(*list3)  # UNPACK
print(list4)
print(list5)


<zip object at 0x7f3b306e5048>
('a', 'b', 'c')
(1, 2, 3)

In [37]:
list1 = ['a', 'b', 'c']
list2 = [1, 2, 3]
list3 = zip(list1, list2)
print(list3)


<zip object at 0x7f3b306e55c8>

In [38]:
valor = 99
if valor == 99:
    print 'veloz'
elif value > 200:
    print 'muito veloz'
else:
    print 'Que velocidade é esse meu fi?'


  File "<ipython-input-38-e63fd88764ac>", line 3
    print 'veloz'
                ^
SyntaxError: Missing parentheses in call to 'print'

In [39]:
valor = 99
if valor == 99:
    print ('veloz')
elif value > 200:
    print ('muito veloz')
else:
    print ('Que velocidade é esse meu fi?')


veloz

In [40]:
a = [1, 3, 5, 7, 9]
for i in a:
    print(i)


1
3
5
7
9

In [41]:
for i, valor in enumerate(a):
    print(i, valor)


0 1
1 3
2 5
3 7
4 9

In [43]:
def soma(a, b):
    return a + b

print (soma(2, 2))
print (soma(3, 2))


4
5

In [44]:
import numpy as np

In [45]:
lista = [1, 2, 3]
array1 = np.array(lista)
print(lista)
print(array1)
print(array1.shape)
print(type(array1))
print(type(lista))


[1, 2, 3]
[1 2 3]
(3,)
<class 'numpy.ndarray'>
<class 'list'>

In [46]:
lista2 = [[1, 2, 3], [3, 4, 5]]
array2 = np.array(lista2)
print(array2)
print('linha 1: {}'.format(array2[0]))
print('média: {}'.format(np.average(array2)))


[[1 2 3]
 [3 4 5]]
linha 1: [1 2 3]
média: 3.0

In [47]:
array1 + array2


Out[47]:
array([[2, 4, 6],
       [4, 6, 8]])

In [48]:
d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

In [49]:
print(d)


[[1 2 3]
 [4 5 6]
 [7 8 9]]

In [50]:
d[:, 0]


Out[50]:
array([1, 4, 7])

In [51]:
%matplotlib inline
import matplotlib.pyplot as plt

In [52]:
y = [1, 7, 3, 5, 12]
x = [1, 2, 3, 4, 5]
plt.plot(x, y, marker='o');



In [53]:
plt.grid()



In [54]:
plt.plot(x, y, marker='o');
plt.grid()



In [55]:
plt.scatter(x, y, marker='x');



In [56]:
%config InlineBackend.figure_format = 'pdf'
plt.bar(x, y);


<matplotlib.figure.Figure at 0x7f3b30137a90>

In [57]:
anos = [1950, 1960, 1970, 1980, 1990, 2000, 2010]
pib = [300.2, 543.3, 1075.9, 2862.5, 5979.6, 10289.7, 14958.3]
plt.plot(anos, pib, marker='o')
plt.title('PIB')
plt.xlabel('Ano')
plt.ylabel(u'Bilhões de R$')
plt.grid()


<matplotlib.figure.Figure at 0x7f3b0793c630>

In [58]:
%config InlineBackend.figure_format = 'png'

In [59]:
anos = [1950, 1960, 1970, 1980, 1990, 2000, 2010]
pib = [300.2, 543.3, 1075.9, 2862.5, 5979.6, 10289.7, 14958.3]
plt.plot(anos, pib, marker='o')
plt.title('PIB')
plt.xlabel('Ano')
plt.ylabel(u'Bilhões de R$')
plt.grid()



In [60]:
%matplotlib inline
import numpy as np
import pandas as pd

In [61]:
s1 = pd.Series([2, 7, 5, 1, 4])
print(s1)


0    2
1    7
2    5
3    1
4    4
dtype: int64

In [62]:
lista = [2, 7, 5, 1, 4]
rotulos = ['a', 'b', 'c', 'd', 'e']
s2 = pd.Series(lista, index=rotulos)
print(s2)


a    2
b    7
c    5
d    1
e    4
dtype: int64

In [63]:
print(s1[2])
print(s2['c'])


5
5

In [64]:
s1.plot(marker='o', grid=True);



In [65]:
matriz = np.array([[1, 2, 3], [4, 5, 6]])
nomes_linhas = ['L1', 'L2']
nomes_cols = ['C1', 'C2', 'C3']

df = pd.DataFrame(matriz, index=nomes_linhas, columns=nomes_cols)
df


Out[65]:
C1 C2 C3
L1 1 2 3
L2 4 5 6

In [66]:
print(df)


    C1  C2  C3
L1   1   2   3
L2   4   5   6

In [67]:
print(df.to_latex())


\begin{tabular}{lrrr}
\toprule
{} &  C1 &  C2 &  C3 \\
\midrule
L1 &   1 &   2 &   3 \\
L2 &   4 &   5 &   6 \\
\bottomrule
\end{tabular}


In [68]:
df.plot(kind='bar', grid=True, title=u'Título');



In [69]:
Teste de base


  File "<ipython-input-69-1f16480e1fff>", line 1
    Teste de base
           ^
SyntaxError: invalid syntax

In [70]:
X = np.load('X.npy')
X_transf = np.load('X_transf.npy')
y = np.load('y.npy')

In [71]:
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [72]:
sklearn.__version__


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-72-a516729597cc> in <module>()
----> 1 sklearn.__version__

NameError: name 'sklearn' is not defined

In [73]:
import sklearn
sklearn.__version__


Out[73]:
'0.18.1'

In [74]:
num_folds = 10
seed = 7

model = LogisticRegression()

i = 0
kfold = model_selection.KFold(n_splits=num_folds, random_state=seed)
accuracy = np.zeros(num_folds)

for train_idx, test_idx in kfold.split(X):
    print(len(train_idx), len(test_idx))
    
    # TREINO
    model.fit(X[train_idx], y[train_idx])    
    
    # FAZER PREDIÇÃO A PARTIR DO MODELO TREINADO
    y_pred = model.predict(X[test_idx])
    
    # AVALIAR A TÉCNICA ATRAVÉS DA COMPARAÇÃO DO DADO REAL COM O DADO PREVISTO 
    accuracy[i] = accuracy_score(y[test_idx], y_pred)
    i += 1

print(accuracy)
print(np.average(accuracy), np.std(accuracy))


691 77
691 77
691 77
691 77
691 77
691 77
691 77
691 77
692 76
692 76
[ 0.7012987   0.81818182  0.74025974  0.71428571  0.77922078  0.75324675
  0.85714286  0.80519481  0.72368421  0.80263158]
0.76951469583 0.0484105192457

In [75]:
results = model_selection.cross_val_score(model, X, y, cv=kfold)
print(results)
print("Accuracy: {} {}".format(results.mean(), results.std()) )


[ 0.7012987   0.81818182  0.74025974  0.71428571  0.77922078  0.75324675
  0.85714286  0.80519481  0.72368421  0.80263158]
Accuracy: 0.7695146958304853 0.04841051924567195

In [76]:
results = model_selection.cross_val_score(model, X_transf, y, cv=kfold)
print(results)
print("Accuracy: {} {}".format(results.mean(), results.std()) )


[ 0.68831169  0.83116883  0.76623377  0.7012987   0.77922078  0.79220779
  0.84415584  0.83116883  0.76315789  0.80263158]
Accuracy: 0.7799555707450445 0.050088006076024816

In [78]:
from sklearn.ensemble import GradientBoostingClassifier

In [80]:
model3 = GradientBoostingClassifier(n_estimators=50, max_depth=3, learning_rate=0.05)
results = model_selection.cross_val_score(model3, X_transf, y, cv=kfold)
print(results)
print("Accuracy: {} {}".format(results.mean(), results.std()) )


[ 0.71428571  0.85714286  0.72727273  0.63636364  0.79220779  0.81818182
  0.81818182  0.85714286  0.73684211  0.78947368]
Accuracy: 0.7747095010252905 0.06661018830310401

In [81]:
from itertools import product

n_estimators_ = [20, 40, 60, 80, 100]
max_depth_ = [2, 3, 5]
learning_rate_ = [0.05, 0.1]

results = []
for ne, md, lr in product(n_estimators_, max_depth_, learning_rate_):
    print(ne, md, lr)
    model_ = GradientBoostingClassifier(n_estimators=ne, max_depth=md, learning_rate=lr)
    result = model_selection.cross_val_score(model3, X_transf, y, cv=kfold)
    result_ = { 'n_estimators': ne, 'max_depth': md, 'learning_rate': lr, 
                'accuracy': result.mean(), 'std': result.std() } 

    results.append(result_)


20 2 0.05
20 2 0.1
20 3 0.05
20 3 0.1
20 5 0.05
20 5 0.1
40 2 0.05
40 2 0.1
40 3 0.05
40 3 0.1
40 5 0.05
40 5 0.1
60 2 0.05
60 2 0.1
60 3 0.05
60 3 0.1
60 5 0.05
60 5 0.1
80 2 0.05
80 2 0.1
80 3 0.05
80 3 0.1
80 5 0.05
80 5 0.1
100 2 0.05
100 2 0.1
100 3 0.05
100 3 0.1
100 5 0.05
100 5 0.1

In [82]:
df = pd.DataFrame(results, columns=['n_estimators', 'max_depth', 'learning_rate', 'accuracy', 'std'])
df.sort_values(by='accuracy', ascending=False)


Out[82]:
n_estimators max_depth learning_rate accuracy std
0 20 2 0.05 0.774710 0.066610
14 60 3 0.05 0.774710 0.066610
28 100 5 0.05 0.774710 0.066610
27 100 3 0.10 0.774710 0.066610
26 100 3 0.05 0.774710 0.066610
25 100 2 0.10 0.774710 0.066610
23 80 5 0.10 0.774710 0.066610
22 80 5 0.05 0.774710 0.066610
21 80 3 0.10 0.774710 0.066610
20 80 3 0.05 0.774710 0.066610
19 80 2 0.10 0.774710 0.066610
18 80 2 0.05 0.774710 0.066610
17 60 5 0.10 0.774710 0.066610
16 60 5 0.05 0.774710 0.066610
15 60 3 0.10 0.774710 0.066610
12 60 2 0.05 0.774710 0.066610
11 40 5 0.10 0.774710 0.066610
10 40 5 0.05 0.774710 0.066610
8 40 3 0.05 0.774710 0.066610
7 40 2 0.10 0.774710 0.066610
6 40 2 0.05 0.774710 0.066610
5 20 5 0.10 0.774710 0.066610
4 20 5 0.05 0.774710 0.066610
3 20 3 0.10 0.774710 0.066610
2 20 3 0.05 0.774710 0.066610
29 100 5 0.10 0.774710 0.066610
1 20 2 0.10 0.773411 0.067890
13 60 2 0.10 0.773411 0.067641
9 40 3 0.10 0.773411 0.067641
24 100 2 0.05 0.773411 0.067890

In [ ]: