In [7]:
lista = [1, 2, 3, 4, 5]
In [8]:
lista[1:3]
Out[8]:
In [9]:
a, b = lista[1:3]
a, b
Out[9]:
In [10]:
len(lista)
Out[10]:
In [11]:
sum(lista)
Out[11]:
In [12]:
5 in lista
Out[12]:
In [13]:
7 in lista
Out[13]:
In [14]:
lista.append(7)
lista.append(6)
lista.append(7)
lista
Out[14]:
In [15]:
lista.remove(7)
lista
Out[15]:
In [16]:
lista.extend([8, 9, -10])
lista
Out[16]:
In [17]:
lista2 = lista
lista2[-1] = 10
print(lista)
print(lista2)
In [19]:
lista3 = lista[:]
lista3[-1] = 10
print(lista)
print(lista3)
In [20]:
lista[-1] = -11
In [21]:
lista
Out[21]:
In [22]:
lista3 = lista[:]
lista3[-1] = 10
print(lista)
print(lista3)
In [23]:
range(5)
Out[23]:
In [24]:
range(5)
Out[24]:
In [25]:
range(5)
Out[25]:
In [26]:
range(5, 0, -1)
Out[26]:
In [27]:
print(range(5))
In [28]:
lista4 = [-4, 1, -2, 3]
print(sorted(lista4)) # ordena sem alterar a lista
print(sorted(lista4, reverse=True))
print(sorted(lista4, key=abs))
print(lista4)
In [29]:
lista4.sort()
print(lista4)
In [30]:
a = ['a', 'casa', 'está', 'muito', 'mal', 'organizada']
print(' '.join(a))
In [31]:
print(''.join(a))
In [32]:
import random
random.shuffle(a)
print(a)
random.shuffle(a)
print(a)
In [33]:
random.choice(a)
Out[33]:
In [34]:
dicionario = {'nome': 'Neto', 'sobrenome': 'Deolino'}
print(dicionario['nome'])
print(dicionario['sobrenome'])
print(dicionario)
In [35]:
dicionario.keys()
Out[35]:
In [36]:
list1 = ['a', 'b', 'c']
list2 = [1, 2, 3]
list3 = zip(list1, list2) # PACK
print(list3)
list4, list5 = zip(*list3) # UNPACK
print(list4)
print(list5)
In [37]:
list1 = ['a', 'b', 'c']
list2 = [1, 2, 3]
list3 = zip(list1, list2)
print(list3)
In [38]:
valor = 99
if valor == 99:
print 'veloz'
elif value > 200:
print 'muito veloz'
else:
print 'Que velocidade é esse meu fi?'
In [39]:
valor = 99
if valor == 99:
print ('veloz')
elif value > 200:
print ('muito veloz')
else:
print ('Que velocidade é esse meu fi?')
In [40]:
a = [1, 3, 5, 7, 9]
for i in a:
print(i)
In [41]:
for i, valor in enumerate(a):
print(i, valor)
In [43]:
def soma(a, b):
return a + b
print (soma(2, 2))
print (soma(3, 2))
In [44]:
import numpy as np
In [45]:
lista = [1, 2, 3]
array1 = np.array(lista)
print(lista)
print(array1)
print(array1.shape)
print(type(array1))
print(type(lista))
In [46]:
lista2 = [[1, 2, 3], [3, 4, 5]]
array2 = np.array(lista2)
print(array2)
print('linha 1: {}'.format(array2[0]))
print('média: {}'.format(np.average(array2)))
In [47]:
array1 + array2
Out[47]:
In [48]:
d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
In [49]:
print(d)
In [50]:
d[:, 0]
Out[50]:
In [51]:
%matplotlib inline
import matplotlib.pyplot as plt
In [52]:
y = [1, 7, 3, 5, 12]
x = [1, 2, 3, 4, 5]
plt.plot(x, y, marker='o');
In [53]:
plt.grid()
In [54]:
plt.plot(x, y, marker='o');
plt.grid()
In [55]:
plt.scatter(x, y, marker='x');
In [56]:
%config InlineBackend.figure_format = 'pdf'
plt.bar(x, y);
In [57]:
anos = [1950, 1960, 1970, 1980, 1990, 2000, 2010]
pib = [300.2, 543.3, 1075.9, 2862.5, 5979.6, 10289.7, 14958.3]
plt.plot(anos, pib, marker='o')
plt.title('PIB')
plt.xlabel('Ano')
plt.ylabel(u'Bilhões de R$')
plt.grid()
In [58]:
%config InlineBackend.figure_format = 'png'
In [59]:
anos = [1950, 1960, 1970, 1980, 1990, 2000, 2010]
pib = [300.2, 543.3, 1075.9, 2862.5, 5979.6, 10289.7, 14958.3]
plt.plot(anos, pib, marker='o')
plt.title('PIB')
plt.xlabel('Ano')
plt.ylabel(u'Bilhões de R$')
plt.grid()
In [60]:
%matplotlib inline
import numpy as np
import pandas as pd
In [61]:
s1 = pd.Series([2, 7, 5, 1, 4])
print(s1)
In [62]:
lista = [2, 7, 5, 1, 4]
rotulos = ['a', 'b', 'c', 'd', 'e']
s2 = pd.Series(lista, index=rotulos)
print(s2)
In [63]:
print(s1[2])
print(s2['c'])
In [64]:
s1.plot(marker='o', grid=True);
In [65]:
matriz = np.array([[1, 2, 3], [4, 5, 6]])
nomes_linhas = ['L1', 'L2']
nomes_cols = ['C1', 'C2', 'C3']
df = pd.DataFrame(matriz, index=nomes_linhas, columns=nomes_cols)
df
Out[65]:
In [66]:
print(df)
In [67]:
print(df.to_latex())
In [68]:
df.plot(kind='bar', grid=True, title=u'Título');
In [69]:
Teste de base
In [70]:
X = np.load('X.npy')
X_transf = np.load('X_transf.npy')
y = np.load('y.npy')
In [71]:
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
In [72]:
sklearn.__version__
In [73]:
import sklearn
sklearn.__version__
Out[73]:
In [74]:
num_folds = 10
seed = 7
model = LogisticRegression()
i = 0
kfold = model_selection.KFold(n_splits=num_folds, random_state=seed)
accuracy = np.zeros(num_folds)
for train_idx, test_idx in kfold.split(X):
print(len(train_idx), len(test_idx))
# TREINO
model.fit(X[train_idx], y[train_idx])
# FAZER PREDIÇÃO A PARTIR DO MODELO TREINADO
y_pred = model.predict(X[test_idx])
# AVALIAR A TÉCNICA ATRAVÉS DA COMPARAÇÃO DO DADO REAL COM O DADO PREVISTO
accuracy[i] = accuracy_score(y[test_idx], y_pred)
i += 1
print(accuracy)
print(np.average(accuracy), np.std(accuracy))
In [75]:
results = model_selection.cross_val_score(model, X, y, cv=kfold)
print(results)
print("Accuracy: {} {}".format(results.mean(), results.std()) )
In [76]:
results = model_selection.cross_val_score(model, X_transf, y, cv=kfold)
print(results)
print("Accuracy: {} {}".format(results.mean(), results.std()) )
In [78]:
from sklearn.ensemble import GradientBoostingClassifier
In [80]:
model3 = GradientBoostingClassifier(n_estimators=50, max_depth=3, learning_rate=0.05)
results = model_selection.cross_val_score(model3, X_transf, y, cv=kfold)
print(results)
print("Accuracy: {} {}".format(results.mean(), results.std()) )
In [81]:
from itertools import product
n_estimators_ = [20, 40, 60, 80, 100]
max_depth_ = [2, 3, 5]
learning_rate_ = [0.05, 0.1]
results = []
for ne, md, lr in product(n_estimators_, max_depth_, learning_rate_):
print(ne, md, lr)
model_ = GradientBoostingClassifier(n_estimators=ne, max_depth=md, learning_rate=lr)
result = model_selection.cross_val_score(model3, X_transf, y, cv=kfold)
result_ = { 'n_estimators': ne, 'max_depth': md, 'learning_rate': lr,
'accuracy': result.mean(), 'std': result.std() }
results.append(result_)
In [82]:
df = pd.DataFrame(results, columns=['n_estimators', 'max_depth', 'learning_rate', 'accuracy', 'std'])
df.sort_values(by='accuracy', ascending=False)
Out[82]:
In [ ]: