In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
In [2]:
df = pd.DataFrame({'A': range(5),
'B': [x**2 for x in range(5)],
'C': [x**3 for x in range(5)]})
In [3]:
print(df)
In [5]:
df_corr = df.corr()
print(df_corr)
print(type(df_corr))
In [6]:
df['D'] = list('abcde')
df['E'] = [True, False, True, True, False]
print(df)
In [7]:
print(df.dtypes)
In [8]:
df_corr = df.corr()
print(df_corr)
In [9]:
df_nan = df.copy()
df_nan.iloc[[2, 3, 4], 1] = np.nan
print(df_nan)
In [10]:
df_nan_corr = df_nan.corr()
print(df_nan_corr)
In [11]:
sns.heatmap(df_corr, vmax=1, vmin=-1, center=0)
plt.savefig('data/dst/seaborn_heatmap_corr_example.png')
In [12]:
df_house = pd.read_csv('data/src/house_prices_train.csv', index_col=0)
In [13]:
print(df_house.shape)
In [14]:
print(df_house.dtypes.value_counts())
In [15]:
df_house_corr = df_house.corr()
In [16]:
print(df_house_corr.shape)
In [17]:
fig, ax = plt.subplots(figsize=(12, 9))
sns.heatmap(df_house_corr, square=True, vmax=1, vmin=-1, center=0)
plt.savefig('data/dst/seaborn_heatmap_house_price.png')