In [1]:
import pandas as pd
import numpy as np
In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [3]:
ad_data = pd.read_csv("advertising.csv")
In [4]:
ad_data.head()
Out[4]:
In [5]:
ad_data.describe()
Out[5]:
In [6]:
ad_data["Age"].hist(bins = 30)
Out[6]:
In [7]:
sns.jointplot(ad_data["Age"], ad_data["Area Income"])
Out[7]:
In [8]:
sns.jointplot(ad_data["Age"], ad_data["Daily Time Spent on Site"], kind="kde")
Out[8]:
In [9]:
sns.jointplot(ad_data["Daily Internet Usage"], ad_data["Daily Time Spent on Site"])
Out[9]:
In [10]:
sns.pairplot(ad_data, hue="Clicked on Ad")
Out[10]:
In [13]:
from sklearn.model_selection import train_test_split
In [22]:
preX = ad_data.drop(["Ad Topic Line", "City", "Country", "Timestamp"], axis = 1, )
X = preX.drop("Clicked on Ad", axis = 1)
y = ad_data["Clicked on Ad"]
In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)
In [24]:
from sklearn.linear_model import LogisticRegression
In [25]:
logreg = LogisticRegression()
In [26]:
logreg.fit(X_train, y_train)
Out[26]:
In [27]:
predictions = logreg.predict(X_test)
In [29]:
from sklearn.metrics import classification_report
In [30]:
print(classification_report(y_test, predictions))
In [ ]: