Analysing tabular data

We are going to use numpy


In [2]:
import numpy

In [3]:
numpy.loadtxt(fname='data/weather-01.csv', delimiter = ',')


Out[3]:
array([[ 0.,  0.,  1., ...,  3.,  0.,  0.],
       [ 0.,  1.,  2., ...,  1.,  0.,  1.],
       [ 0.,  1.,  1., ...,  2.,  1.,  1.],
       ..., 
       [ 0.,  1.,  1., ...,  1.,  1.,  1.],
       [ 0.,  0.,  0., ...,  0.,  2.,  0.],
       [ 0.,  0.,  1., ...,  1.,  1.,  0.]])

Variables


In [4]:
weight_kg = 55

In [5]:
print weight_kg


55

In [6]:
print ('Weight in pounds: ', weight_kg * 2.2)


('Weight in pounds: ', 121.00000000000001)

In [7]:
weight_kg = 57.5

In [8]:
print ('New weight: ', weight_kg * 2.2)


('New weight: ', 126.50000000000001)

In [9]:
%whos


Variable    Type      Data/Info
-------------------------------
numpy       module    <module 'numpy' from '/Us<...>ages/numpy/__init__.pyc'>
weight_kg   float     57.5

In [10]:
data = numpy.loadtxt(fname='data/weather-01.csv', delimiter = ',')

In [11]:
print data


[[ 0.  0.  1. ...,  3.  0.  0.]
 [ 0.  1.  2. ...,  1.  0.  1.]
 [ 0.  1.  1. ...,  2.  1.  1.]
 ..., 
 [ 0.  1.  1. ...,  1.  1.  1.]
 [ 0.  0.  0. ...,  0.  2.  0.]
 [ 0.  0.  1. ...,  1.  1.  0.]]

In [12]:
print type(data)


<type 'numpy.ndarray'>

In [13]:
# finding out the data type

print data.dtype
print data.shape


float64
(60, 40)

In [14]:
# Getting a single number out of the array

print 'First value in data: ', data [0,0]


First value in data:  0.0

In [15]:
print 'A middle data: ', data [10,10]


A middle data:  5.0

In [16]:
# Let's get the first 10 columns for the first 4 rows

print data[0:4,0:10]


[[ 0.  0.  1.  3.  1.  2.  4.  7.  8.  3.]
 [ 0.  1.  2.  1.  2.  1.  3.  2.  2.  6.]
 [ 0.  1.  1.  3.  3.  2.  6.  2.  5.  9.]
 [ 0.  0.  2.  0.  4.  2.  2.  1.  6.  7.]]

In [17]:
print data[:3,36:]


[[ 2.  3.  0.  0.]
 [ 1.  1.  0.  1.]
 [ 2.  2.  1.  1.]]

In [18]:
smallchunk = data[3:10,5:12]
doublesmallchunk = smallchunk * 2

In [19]:
print doublesmallchunk


[[  4.   4.   2.  12.  14.  20.  14.]
 [  2.   6.  10.   4.   8.   8.  14.]
 [  8.   4.   2.  12.   8.  14.  12.]
 [  4.   4.  10.  10.  16.  12.  10.]
 [  2.   4.   6.  10.   6.  14.  16.]
 [ 10.  12.  10.  10.  16.   4.   8.]
 [  6.  10.   6.  10.  16.  12.  16.]]

In [20]:
print (numpy.mean(data))


6.14875

In [21]:
print (numpy.max(data))


20.0

In [22]:
print (numpy.min(data))


0.0

In [23]:
# get a set of data for the first station

station_0 = data[0,:]

In [24]:
print numpy.max(station_0)


18.0

In [25]:
print station_0


[  0.   0.   1.   3.   1.   2.   4.   7.   8.   3.   3.   3.  10.   5.   7.
   4.   7.   7.  12.  18.   6.  13.  11.  11.   7.   7.   4.   6.   8.   8.
   4.   4.   5.   7.   3.   4.   2.   3.   0.   0.]

In [26]:
# We don't need to create 'temporary' array slices! We can refer to so-called array axes

print numpy.mean(data, axis = 0)


[  0.           0.45         1.11666667   1.75         2.43333333   3.15
   3.8          3.88333333   5.23333333   5.51666667   5.95         5.9
   8.35         7.73333333   8.36666667   9.5          9.58333333
  10.63333333  11.56666667  12.35        13.25        11.96666667
  11.03333333  10.16666667  10.           8.66666667   9.15         7.25
   7.33333333   6.58333333   6.06666667   5.95         5.11666667   3.6
   3.3          3.56666667   2.48333333   1.5          1.13333333
   0.56666667]

In [27]:
print data


[[ 0.  0.  1. ...,  3.  0.  0.]
 [ 0.  1.  2. ...,  1.  0.  1.]
 [ 0.  1.  1. ...,  2.  1.  1.]
 ..., 
 [ 0.  1.  1. ...,  1.  1.  1.]
 [ 0.  0.  0. ...,  0.  2.  0.]
 [ 0.  0.  1. ...,  1.  1.  0.]]

In [28]:
import matplotlib.pyplot

In [29]:
%matplotlib inline

In [30]:
image = matplotlib.pyplot.imshow(data)



In [31]:
# Let's look at the average temperature over time

avg_temperature = numpy.mean(data, axis = 0)

In [32]:
avg_plot = matplotlib.pyplot.plot(avg_temperature)



In [33]:
max_temperature = numpy.max(data, axis = 0)
max_plot = matplotlib.pyplot.plot(max_temperature)



In [34]:
min_temperature = numpy.min(data, axis = 0)
min_plot = matplotlib.pyplot.plot(min_temperature)



In [ ]:


In [ ]:


In [ ]: