In [207]:
#http://www.lotoideas.com/primitiva-resultados-historicos-de-todos-los-sorteos/
import numpy as np
import pandas as pd
%matplotlib inline
In [208]:
data = pd.read_csv('2013-2018.csv')
data.columns= ['Date','N1','N2','N3','N4','N5','N6','Comp','R','Joker']
data.set_index(pd.to_datetime(data['Date'] , format='%d/%m/%Y'), inplace=True)
data.sort_index(inplace=True)
data.head()
Out[208]:
In [209]:
ax = data.iloc[:,1:7].plot(subplots=True, layout=(2,3))
data.describe()
Out[209]:
In [210]:
data.N1.hist()
data.N2.hist()
data.N3.hist()
data.N4.hist()
data.N5.hist()
data.N6.hist()
Out[210]:
Seeing the previous graphs it's obvious to say that the distribution of numbers are clearly random and steady.
In [211]:
#distance between numbers
data['D12'] = data.N2 - data.N1
data['D23'] = data.N3 - data.N2
data['D34'] = data.N4 - data.N3
data['D45'] = data.N5 - data.N4
data['D56'] = data.N6 - data.N5
In [243]:
#number odds and evens
data['evens'] = data.iloc[:,1:7].apply(lambda x: x%2).sum(axis=1)
data['odds'] = 6 - data['evens']
In [291]:
data['Timestamp'] = pd.to_datetime(data['Date'] , format='%d/%m/%Y').apply(lambda x: pd.Timestamp(x).value)
data.head()
Out[291]:
In [198]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
X, y = data.iloc[:,1:7].values, data.iloc[:,7:14].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)