In [1]:
# First, we'll "import" the software packages needed.
import pandas as pd
import numpy as np
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
inline_rc = dict(mpl.rcParams)
# Starting a line with a hashtag tells the program not to read the line.
# That way we can write "comments" to humans trying to figure out what the code does.
# Blank lines don't do anything either, but they can make the code easier to read.
Now let's choose some data to plot. In this example we'll pull data from CERN's CMS detector and make a histogram of invariant mass. You can find more at CERN OpenData. This next cell will take a little while to run since it's grabbing a pretty big data set. This one contains 100,000 collision events. The cell label will look like "In [*]" while it's still thinking and "In [2]" when it's finished.
In [2]:
# Here's dimuon data:
data = pd.read_csv('http://opendata.cern.ch/record/303/files/dimuon.csv')
# Analyze dielectron data instead by un-commenting this URL instead:
# http://opendata.cern.ch/record/304/files/dielectron.csv
We can view the first few rows of the file we just imported.
In [3]:
# The .head(n) command displays the first n rows of the file.
data.head(3)
Out[3]:
In [4]:
# adding a ; at the end of the next line will "suppress" the text output of the histogram's frequency table
plt.hist(data.M, bins=120, range=[0,120], log=True)
plt.title("CMS Dimuon Mass Plot")
plt.xlabel("mass (GeV)")
plt.ylabel("number of events")
Out[4]:
Try editing the number of bins or bin range in the previous code cell. To re-exectue the code, click the play icon in the toolbar or press SHIFT + ENTER.
In [ ]:
In [5]:
# create a new data set of only the events containing oppositely charges particles
data2 = data[data.Q1 != data.Q2] # change != to == for same charge
In [6]:
# create a new data set of only events in a certain mass range
data3 = data[(data.M > 50) & (data.M < 80)] # this choses 50 to 80 GeV
In [7]:
# make a scatterplot of two columns
# plt.scatter(x_column, y_column, s=point_size, other parameters)
plt.scatter(data.eta1, data.phi1, s=.001)
Out[7]:
In [8]:
# make your plots look like they're from xkcd.com
plt.xkcd()
# plt.hist can stack two histograms
d1 = data[data.Q1 == data.Q2]
d2 = data[data.Q1 != data.Q2]
fig = plt.figure(figsize=(10, 5))
plt.hist([d1.M, d2.M], range=[2,5], stacked=True, label=["events with same Q","events with opp Q"], bins=20, log=True)
plt.title("Cutting on net charge")
plt.xlabel("mass (GeV)")
plt.ylabel("log number of events")
plt.legend()
Out[8]:
In [9]:
# to make normal-looking plots again
mpl.rcParams.update(inline_rc)