In [ ]:
# import software packages
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
inline_rc = dict(mpl.rcParams)
In [ ]:
# enter column labels and raw data (with same # of values)
table1 = pd.DataFrame.from_items([
('column1', [0,1,2,3]),
('column2', [0,2,4,6])
])
# display data table
table1
In [ ]:
# Uncomment the next line to make your graphs look like xkcd.com
#plt.xkcd()
# to make normal-looking plots again execute:
#mpl.rcParams.update(inline_rc)
In [ ]:
# set variables = data['column label']
x = table1['column1']
y = table1['column2']
# this makes a scatterplot of the data
# plt.scatter(x values, y values)
plt.scatter(x, y)
plt.title("?")
plt.xlabel("?")
plt.ylabel("?")
plt.autoscale(tight=True)
# calculate a trendline equation
# np.polyfit( x values, y values, polynomial order)
trend1 = np.polyfit(x, y, 1)
# plot trendline
# plt.plot(x values, y values, other parameters)
plt.plot(x, np.poly1d(trend1)(x), label='trendline')
plt.legend(loc='upper left')
In [ ]:
# display the trendline's coefficients (slope, y-int)
trend1
In [ ]:
# create a new empty column
table1['column3'] = ''
table1
Here's an example of calculating the difference between the values in column 2:
In [ ]:
# np.diff() calculates the difference between a value and the one after it
z = np.diff(x)
# fill column 3 with values from the formula (z) above:
table1['column3'] = pd.DataFrame.from_items([('', z)])
# display the data table
table1
In [ ]:
# NaN and Inf values cause problems with math and plotting.
# Make a new table using only selected rows and columns
table2 = table1.loc[0:2,['column1', 'column2', 'column3']] # this keeps rows 0 through 2
table2
In [ ]:
# set new variables to plot
x2 = table2['column1']
y2 = table2['column3']
Now you can copy the code above to plot your new data table.
In [ ]:
# code for plotting table2 can go here