In [1]:
#first start python, then import the packages you want to use.
#You can also put all of this in a .py file and run that by typing
# python filename.py
#
# OR you can import the file itself while you are inside ipython and it will execute
#
# Matplotlib has an excellent plot gallery which you can browse and then see the
# code that made the plot you'd like:  http://matplotlib.org/gallery.html

%matplotlib inline 
from astropy.io import ascii
from astropy import table
import matplotlib.pyplot as plt #this syntax means you can call the function with plt
import numpy as np #same here, you don't have to say numpy.mean you can say np.mean

read in your csv file, I've created a file of fake stuff which looks like this:

id,first_name,last_name,email,country,ip_address 1,Edward,Little,elittle0@123-reg.co.uk,Philippines,150.82.68.61 2,Karen,Medina,kmedina1@toplist.cz,Belarus,134.180.95.112 3,Sarah,Warren,swarren2@ocn.ne.jp,Brazil,238.31.34.51 4,Nancy,Porter,nporter3@xrea.com,China,141.54.91.237 5,Cynthia,Ryan,cryan4@merriam-webster.com,China,240.253.168.2 6,Christine,Reid,creid5@fotki.com,Philippines,235.144.30.199


In [2]:
mydata=ascii.read('MOCK_DATA.csv') #it will do some guesswork with what you give it, but you can also specify formatting

In [3]:
mydata #prints a nice HTML table in the notebook


Out[3]:
<Table masked=False length=100>
idfirst_namelast_nameemailcountryip_address
int64string72string80string264string176string120
1EdwardLittleelittle0@123-reg.co.ukPhilippines150.82.68.61
2KarenMedinakmedina1@toplist.czBelarus134.180.95.112
3SarahWarrenswarren2@ocn.ne.jpBrazil238.31.34.51
4NancyPorternporter3@xrea.comChina141.54.91.237
5CynthiaRyancryan4@merriam-webster.comChina240.253.168.2
6ChristineReidcreid5@fotki.comPhilippines235.144.30.199
7MelissaRiveramrivera6@utexas.eduChina160.228.167.157
8RyanCarterrcarter7@salon.comHonduras67.42.75.243
9JaneMillerjmiller8@google.comChina106.226.168.245
10ThomasWeavertweaver9@eventbrite.comChina134.222.9.178
..................
91LillianShawlshaw2i@creativecommons.orgJapan92.67.199.158
92SamuelWatsonswatson2j@mapquest.comRussia248.6.159.147
93JonathanReidjreid2k@biblegateway.comSlovenia187.82.21.220
94SteveArmstrongsarmstrong2l@sitemeter.comChina245.248.7.239
95RogerPerryrperry2m@marketwatch.comChina171.194.230.91
96AnnBoydaboyd2n@ustream.tvBrazil218.22.225.99
97MichaelLewismlewis2o@nsw.gov.auIndonesia228.144.235.44
98LillianHilllhill2p@oakley.comPapua New Guinea108.56.214.12
99LillianJohnsonljohnson2q@amazon.deChina249.10.42.51
100LauraJamesljames2r@tiny.ccIndonesia117.207.220.225

In [4]:
#now lets add a column to the table of data with some random numbers to plot
mydata['random']=np.random.rand(100) #add a random list of numbers

In [5]:
mydata #is it in there?


Out[5]:
<Table masked=False length=100>
idfirst_namelast_nameemailcountryip_addressrandom
int64string72string80string264string176string120float64
1EdwardLittleelittle0@123-reg.co.ukPhilippines150.82.68.610.494520597337
2KarenMedinakmedina1@toplist.czBelarus134.180.95.1120.289845931316
3SarahWarrenswarren2@ocn.ne.jpBrazil238.31.34.510.0414157138046
4NancyPorternporter3@xrea.comChina141.54.91.2370.406380758172
5CynthiaRyancryan4@merriam-webster.comChina240.253.168.20.619920719204
6ChristineReidcreid5@fotki.comPhilippines235.144.30.1990.503942940748
7MelissaRiveramrivera6@utexas.eduChina160.228.167.1570.301516515341
8RyanCarterrcarter7@salon.comHonduras67.42.75.2430.704877743324
9JaneMillerjmiller8@google.comChina106.226.168.2450.820831813621
10ThomasWeavertweaver9@eventbrite.comChina134.222.9.1780.47967175912
.....................
91LillianShawlshaw2i@creativecommons.orgJapan92.67.199.1580.317876244377
92SamuelWatsonswatson2j@mapquest.comRussia248.6.159.1470.636703417648
93JonathanReidjreid2k@biblegateway.comSlovenia187.82.21.2200.559439182586
94SteveArmstrongsarmstrong2l@sitemeter.comChina245.248.7.2390.71415277566
95RogerPerryrperry2m@marketwatch.comChina171.194.230.910.0060359445533
96AnnBoydaboyd2n@ustream.tvBrazil218.22.225.990.652625069346
97MichaelLewismlewis2o@nsw.gov.auIndonesia228.144.235.440.948495930874
98LillianHilllhill2p@oakley.comPapua New Guinea108.56.214.120.728972017582
99LillianJohnsonljohnson2q@amazon.deChina249.10.42.510.688664065408
100LauraJamesljames2r@tiny.ccIndonesia117.207.220.2250.866054250663

In [6]:
#so lets plot the ID vs random
plt.plot(mydata['id'],mydata['random'])
plt.title("Person ID versus Random Number")
plt.xlabel("ID")
plt.ylabel(mydata['random'].name) #ooo, fancy


Out[6]:
<matplotlib.text.Text at 0x109618850>

In [7]:
#how about plus signs that are green instead of connected points?
plt.plot(mydata['id'],mydata['random'],"g+")


Out[7]:
[<matplotlib.lines.Line2D at 0x1096a76d0>]

In [8]:
#lets just look at people from China?
subtable = mydata[mydata['country'] == "China"]

In [9]:
subtable


Out[9]:
<Table masked=False length=23>
idfirst_namelast_nameemailcountryip_addressrandom
int64string72string80string264string176string120float64
4NancyPorternporter3@xrea.comChina141.54.91.2370.406380758172
5CynthiaRyancryan4@merriam-webster.comChina240.253.168.20.619920719204
7MelissaRiveramrivera6@utexas.eduChina160.228.167.1570.301516515341
9JaneMillerjmiller8@google.comChina106.226.168.2450.820831813621
10ThomasWeavertweaver9@eventbrite.comChina134.222.9.1780.47967175912
19JimmyFisherjfisheri@sbwire.comChina34.55.70.580.00877312890271
27AliceAllenaallenq@goodreads.comChina231.204.181.970.708037589884
30RyanOrtizrortizt@wisc.eduChina7.30.45.780.896479463919
32DonnaReiddreidv@liveinternet.ruChina175.50.24.2180.538032562421
42GregoryWarrengwarren15@studiopress.comChina35.144.47.1340.894386785817
.....................
54LouiseGonzaleslgonzales1h@upenn.eduChina50.79.44.2110.591076703526
58WandaWatsonwwatson1l@shareasale.comChina21.232.139.1740.16295963433
62RichardWelchrwelch1p@illinois.eduChina90.209.128.830.0782631964479
68ChristinaPortercporter1v@jigsy.comChina134.190.53.260.971188516319
73AaronGutierrezagutierrez20@europa.euChina164.154.84.2310.837672040531
80MargaretStewartmstewart27@fema.govChina119.29.199.460.151119441736
90TeresaPattersontpatterson2h@biglobe.ne.jpChina6.202.225.2480.408303806967
94SteveArmstrongsarmstrong2l@sitemeter.comChina245.248.7.2390.71415277566
95RogerPerryrperry2m@marketwatch.comChina171.194.230.910.0060359445533
99LillianJohnsonljohnson2q@amazon.deChina249.10.42.510.688664065408

In [10]:
#how about making a bar graph of number of people from each country
countries=np.unique(mydata['country']) #returns a list of unique names

In [11]:
countries


Out[11]:
<Column name='country' dtype='string176' length=41>
Albania
Argentina
Belarus
Bosnia and Herzegovina
Brazil
Cambodia
Cayman Islands
China
Cyprus
Czech Republic
Dominican Republic
Ecuador
...
Palestinian Territory
Papua New Guinea
Peru
Philippines
Poland
Portugal
Russia
Slovenia
Thailand
Togo
United States
Vietnam

In [12]:
#now we count the members for each country and save as a list
count=list()
for country in countries:
    count.append(len(mydata[mydata['country']==country]))

In [13]:
y_pos=np.arange(len(countries))
plt.barh(y_pos, count, align='center', alpha=0.4)
plt.yticks(y_pos, countries)
plt.xlabel('Number of People in each country')
plt.title('Country Representation')


Out[13]:
<matplotlib.text.Text at 0x1096d32d0>

In [14]:
#that's a lot of countries, let's just plot the first 10
small_count=count[:10] #take the first 10
small_country=countries[:10]

In [15]:
y_pos=np.arange(len(small_country))
plt.barh(y_pos, small_count, align='center', alpha=0.4)
plt.yticks(y_pos, countries)
plt.xlabel('Number of People in each country')
plt.title('Country Representation')


Out[15]:
<matplotlib.text.Text at 0x109a72d90>

In [15]: