Read these carefully
In [1]:
# Run the following to import necessary packages and import dataset. Do not use any additional plotting libraries.
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
datafile = "dataset/icecream.csv"
df = pd.read_csv(datafile)
df
Out[1]:
In [19]:
# Here are the correlation coefficients between pairs of columns
corr = df.corr()
corr
Out[19]:
In [60]:
abs_corr = np.abs(df.corr())
indices = corr.index
corr_pairs = []
for i, idx_i in enumerate(indices):
for j, c in enumerate(abs_corr[idx_i]):
if c > .9 and i < j:
corr_pairs.append((idx_i, indices[j]))
In [61]:
corr_pairs
Out[61]:
Identify strong (i.e., correleation coefficient > 0.9) and meaningful correlations among pairs of columns in this dataset.
Append these pairs of correlated columns in the following form [column_x, column_y]
to the variable below.
In [7]:
correlations = []
correlations.append(['ice_cream_sales', 'temperature'])
# do not touch
correlations.sort()
print(correlations)
If this clue changes your answer, try again below. Otherwise, if you are confident in your answer above, leave the following untouched.
In [ ]:
# meaningful_correlation.append(['column_x', 'column_y'])
correlations_clue = []
# do not touch
correlations_clue.sort()
print(correlations_clue)