In [1]:
%matplotlib inline
import pandas as pd
In [2]:
data = pd.read_csv('https://github.com/albahnsen/PracticalMachineLearningClass/raw/master/datasets/dataTrain_carListings.zip')
In [3]:
data.head()
Out[3]:
In [4]:
data.shape
Out[4]:
In [5]:
data.Price.describe()
Out[5]:
In [6]:
data.plot(kind='scatter', y='Price', x='Year')
Out[6]:
In [7]:
data.plot(kind='scatter', y='Price', x='Mileage')
Out[7]:
In [8]:
data.columns
Out[8]:
Develop a machine learning model that predicts the price of the of car using as an input ['Year', 'Mileage', 'State', 'Make', 'Model']
Submit the prediction of the testing set to Kaggle https://www.kaggle.com/c/miia4200-20191-p1-usedcarpriceprediction
In [2]:
data_test = pd.read_csv('https://github.com/albahnsen/PracticalMachineLearningClass/raw/master/datasets/dataTest_carListings.zip', index_col=0)
In [3]:
data_test.head()
Out[3]:
In [4]:
data_test.shape
Out[4]:
In [6]:
import numpy as np
In [7]:
np.random.seed(42)
y_pred = pd.DataFrame(np.random.rand(data_test.shape[0]) * 75000 + 5000, index=data_test.index, columns=['Price'])
In [8]:
y_pred.to_csv('test_submission.csv', index_label='ID')
In [9]:
y_pred.head()
Out[9]: