In [1]:
#first start python, then import the packages you want to use.
#You can also put all of this in a .py file and run that by typing
# python filename.py
#
# OR you can import the file itself while you are inside ipython and it will execute
#
# Matplotlib has an excellent plot gallery which you can browse and then see the
# code that made the plot you'd like: http://matplotlib.org/gallery.html
%matplotlib inline
from astropy.io import ascii
from astropy import table
import matplotlib.pyplot as plt #this syntax means you can call the function with plt
import numpy as np #same here, you don't have to say numpy.mean you can say np.mean
id,first_name,last_name,email,country,ip_address
1,Edward,Little,elittle0@123-reg.co.uk,Philippines,150.82.68.61
2,Karen,Medina,kmedina1@toplist.cz,Belarus,134.180.95.112
3,Sarah,Warren,swarren2@ocn.ne.jp,Brazil,238.31.34.51
4,Nancy,Porter,nporter3@xrea.com,China,141.54.91.237
5,Cynthia,Ryan,cryan4@merriam-webster.com,China,240.253.168.2
6,Christine,Reid,creid5@fotki.com,Philippines,235.144.30.199
In [2]:
mydata=ascii.read('MOCK_DATA.csv') #it will do some guesswork with what you give it, but you can also specify formatting
In [3]:
mydata #prints a nice HTML table in the notebook
Out[3]:
In [4]:
#now lets add a column to the table of data with some random numbers to plot
mydata['random']=np.random.rand(100) #add a random list of numbers
In [5]:
mydata #is it in there?
Out[5]:
In [6]:
#so lets plot the ID vs random
plt.plot(mydata['id'],mydata['random'])
plt.title("Person ID versus Random Number")
plt.xlabel("ID")
plt.ylabel(mydata['random'].name) #ooo, fancy
Out[6]:
In [7]:
#how about plus signs that are green instead of connected points?
plt.plot(mydata['id'],mydata['random'],"g+")
Out[7]:
In [8]:
#lets just look at people from China?
subtable = mydata[mydata['country'] == "China"]
In [9]:
subtable
Out[9]:
In [10]:
#how about making a bar graph of number of people from each country
countries=np.unique(mydata['country']) #returns a list of unique names
In [11]:
countries
Out[11]:
In [12]:
#now we count the members for each country and save as a list
count=list()
for country in countries:
count.append(len(mydata[mydata['country']==country]))
In [13]:
y_pos=np.arange(len(countries))
plt.barh(y_pos, count, align='center', alpha=0.4)
plt.yticks(y_pos, countries)
plt.xlabel('Number of People in each country')
plt.title('Country Representation')
Out[13]:
In [14]:
#that's a lot of countries, let's just plot the first 10
small_count=count[:10] #take the first 10
small_country=countries[:10]
In [15]:
y_pos=np.arange(len(small_country))
plt.barh(y_pos, small_count, align='center', alpha=0.4)
plt.yticks(y_pos, countries)
plt.xlabel('Number of People in each country')
plt.title('Country Representation')
Out[15]:
In [15]: