Analysing tabular data

We are going to use a LIBRARY called numpy



In [16]:

    
import numpy



In [17]:

    
numpy.loadtxt(fname='data/weather-01.csv', delimiter = ',')









    Out[17]:





array([[ 0.,  0.,  1., ...,  3.,  0.,  0.],
       [ 0.,  1.,  2., ...,  1.,  0.,  1.],
       [ 0.,  1.,  1., ...,  2.,  1.,  1.],
       ..., 
       [ 0.,  1.,  1., ...,  1.,  1.,  1.],
       [ 0.,  0.,  0., ...,  0.,  2.,  0.],
       [ 0.,  0.,  1., ...,  1.,  1.,  0.]])

Variables



In [18]:

    
weight_kg = 55



In [19]:

    
print (weight_kg)



In [20]:

    
print ('Weight in pounds: ', weight_kg *2.2)









    



Weight in pounds:  121.00000000000001



In [21]:

    
weight_kg = 57.5



In [22]:

    
print ('Weight in pounds: ', weight_kg *2.2)









    



Weight in pounds:  126.50000000000001



In [23]:

    
%whos









    



Variable          Type       Data/Info
--------------------------------------
data              ndarray    60x40: 2400 elems, type `float64`, 19200 bytes
matplotlib        module     <module 'matplotlib' from<...>matplotlib\\__init__.py'>
min_plot          list       n=1
min_temperature   ndarray    40: 40 elems, type `float64`, 320 bytes
numpy             module     <module 'numpy' from 'C:\<...>ges\\numpy\\__init__.py'>
weight_kg         float      57.5



In [24]:

    
data = numpy.loadtxt(fname='data/weather-01.csv', delimiter = ',')



In [25]:

    
print (data)









    



[[ 0.  0.  1. ...,  3.  0.  0.]
 [ 0.  1.  2. ...,  1.  0.  1.]
 [ 0.  1.  1. ...,  2.  1.  1.]
 ..., 
 [ 0.  1.  1. ...,  1.  1.  1.]
 [ 0.  0.  0. ...,  0.  2.  0.]
 [ 0.  0.  1. ...,  1.  1.  0.]]



In [26]:

    
print(type(data))









    



<class 'numpy.ndarray'>



In [27]:

    
%whos









    



Variable          Type       Data/Info
--------------------------------------
data              ndarray    60x40: 2400 elems, type `float64`, 19200 bytes
matplotlib        module     <module 'matplotlib' from<...>matplotlib\\__init__.py'>
min_plot          list       n=1
min_temperature   ndarray    40: 40 elems, type `float64`, 320 bytes
numpy             module     <module 'numpy' from 'C:\<...>ges\\numpy\\__init__.py'>
weight_kg         float      57.5



In [28]:

    
# Finding out the data type
print (data.dtype)









    



float64



In [29]:

    
# Finding out the shape
print (data.shape)



In [30]:

    
# This is 60 rows * 40 columns



In [31]:

    
# Getting a number out of the array
print ("First value in data: ", data [0,0])









    



First value in data:  0.0



In [32]:

    
print ("A value from a selected row and column position: ", data[30,20])









    



A value from a selected row and column position:  13.0



In [33]:

    
#Lets get the first 10 columns for the first 4 rows
# notation means start at X and go up to but not including Y [X:Y]
print (data[0:4, 0:10])









    



[[ 0.  0.  1.  3.  1.  2.  4.  7.  8.  3.]
 [ 0.  1.  2.  1.  2.  1.  3.  2.  2.  6.]
 [ 0.  1.  1.  3.  3.  2.  6.  2.  5.  9.]
 [ 0.  0.  2.  0.  4.  2.  2.  1.  6.  7.]]



In [34]:

    
# can start slicing anywhere
print (data[3:8, 4:7])









    



[[ 4.  2.  2.]
 [ 3.  1.  3.]
 [ 2.  4.  2.]
 [ 4.  2.  2.]
 [ 3.  1.  2.]]



In [35]:

    
#Don't need to include the upper and lower bounds, uses 0 instead or end
smallchunk= data[:3,36:]
print(smallchunk)









    



[[ 2.  3.  0.  0.]
 [ 1.  1.  0.  1.]
 [ 2.  2.  1.  1.]]



In [36]:

    
# Arithmetic with arrays
doublesmallchunk = smallchunk *2.0



In [37]:

    
print(doublesmallchunk)









    



[[ 4.  6.  0.  0.]
 [ 2.  2.  0.  2.]
 [ 4.  4.  2.  2.]]



In [38]:

    
triplesmallchunk = smallchunk+doublesmallchunk



In [39]:

    
print(triplesmallchunk)









    



[[ 6.  9.  0.  0.]
 [ 3.  3.  0.  3.]
 [ 6.  6.  3.  3.]]



In [40]:

    
print(numpy.mean(data))



In [41]:

    
print(numpy.max(data))



In [42]:

    
print(numpy.min(data))

0.0



In [43]:

    
# Get a set of data for the first station
station_0 = data[0, :]



In [44]:

    
print (numpy.max(station_0))



In [45]:

    
# We don't need to creat these 'temporary' array slices
# We can refer to what we call array axes



In [46]:

    
print(numpy.mean(data, axis = 0))









    



[  0.           0.45         1.11666667   1.75         2.43333333   3.15
   3.8          3.88333333   5.23333333   5.51666667   5.95         5.9
   8.35         7.73333333   8.36666667   9.5          9.58333333
  10.63333333  11.56666667  12.35        13.25        11.96666667
  11.03333333  10.16666667  10.           8.66666667   9.15         7.25
   7.33333333   6.58333333   6.06666667   5.95         5.11666667   3.6
   3.3          3.56666667   2.48333333   1.5          1.13333333
   0.56666667]



In [47]:

    
print(numpy.mean(data, axis = 1))









    



[ 5.45   5.425  6.1    5.9    5.55   6.225  5.975  6.65   6.625  6.525
  6.775  5.8    6.225  5.75   5.225  6.3    6.55   5.7    5.85   6.55
  5.775  5.825  6.175  6.1    5.8    6.425  6.05   6.025  6.175  6.55
  6.175  6.35   6.725  6.125  7.075  5.725  5.925  6.15   6.075  5.75
  5.975  5.725  6.3    5.9    6.75   5.925  7.225  6.15   5.95   6.275  5.7
  6.1    6.825  5.975  6.725  5.7    6.25   6.4    7.05   5.9  ]



In [48]:

    
# axis = 0 means calculate down each column (i.e. mean of the values in a column)
# axis = 1 means calculate mean across the rows (i.e. mean of the values in a row)



In [49]:

    
import matplotlib.pyplot



In [50]:

    
%matplotlib inline



In [51]:

    
image = matplotlib.pyplot.imshow(data)



In [52]:

    
# Let's look at the average temperature over time
avg_temperature = numpy.mean(data, axis = 0)



In [53]:

    
avg_plot = matplotlib.pyplot.plot (avg_temperature)



In [54]:

    
# Plot min temperature over time
min_temperature = numpy.min(data, axis=0)
min_plot = matplotlib.pyplot.plot(min_temperature)



In [55]:

    
# plot max temperautres
max_temperature = numpy.max(data, axis =0)
max_plot = matplotlib.pyplot.plot(max_temperature)



In [ ]:



In [ ]: