analysig tabular data

we are going to use a library called numpy



In [4]:

    
import numpy



In [5]:

    
numpy.loadtxt(fname='data/weather-01.csv', delimiter = ',')









    Out[5]:





array([[ 0.,  0.,  1., ...,  3.,  0.,  0.],
       [ 0.,  1.,  2., ...,  1.,  0.,  1.],
       [ 0.,  1.,  1., ...,  2.,  1.,  1.],
       ..., 
       [ 0.,  1.,  1., ...,  1.,  1.,  1.],
       [ 0.,  0.,  0., ...,  0.,  2.,  0.],
       [ 0.,  0.,  1., ...,  1.,  1.,  0.]])

variables



In [6]:

    
weight_kg=55



In [7]:

    
print (weight_kg)



In [8]:

    
print ('Weight in pounds: ', weight_kg*2.2)









    



Weight in pounds:  121.00000000000001



In [9]:

    
weight_kg=57.5



In [10]:

    
print ('New weight:', weight_kg*2.2)









    



New weight: 126.50000000000001



In [11]:

    
%whos









    



Variable    Type      Data/Info
-------------------------------
numpy       module    <module 'numpy' from 'C:\<...>ges\\numpy\\__init__.py'>
weight_kg   float     57.5



In [12]:

    
data = numpy.loadtxt(fname='data/weather-01.csv', delimiter = ',')



In [13]:

    
print (data
      )









    



[[ 0.  0.  1. ...,  3.  0.  0.]
 [ 0.  1.  2. ...,  1.  0.  1.]
 [ 0.  1.  1. ...,  2.  1.  1.]
 ..., 
 [ 0.  1.  1. ...,  1.  1.  1.]
 [ 0.  0.  0. ...,  0.  2.  0.]
 [ 0.  0.  1. ...,  1.  1.  0.]]



In [14]:

    
print (type(data
           ))









    



<class 'numpy.ndarray'>



In [15]:

    
%whos









    



Variable    Type       Data/Info
--------------------------------
data        ndarray    60x40: 2400 elems, type `float64`, 19200 bytes
numpy       module     <module 'numpy' from 'C:\<...>ges\\numpy\\__init__.py'>
weight_kg   float      57.5



In [16]:

    
# finding out the data type
print (data.dtype)









    



float64



In [17]:

    
#find out the shape 
print (data.shape)



In [18]:

    
# this is 60 rows * 40 columns



In [19]:

    
#getting a single number out of the array
print ("First value in data:", data[0,0])









    



First value in data: 0.0



In [20]:

    
print ('A middle value:', data [30,20])









    



A middle value: 13.0



In [21]:

    
#get a slice out of an array
#lets get the first 10 column for the first 4 rows
print(data[0:4, 0:10
        ])
# start at index 0 and go up to BUT NOT INCLUDING index 4









    



[[ 0.  0.  1.  3.  1.  2.  4.  7.  8.  3.]
 [ 0.  1.  2.  1.  2.  1.  3.  2.  2.  6.]
 [ 0.  1.  1.  3.  3.  2.  6.  2.  5.  9.]
 [ 0.  0.  2.  0.  4.  2.  2.  1.  6.  7.]]



In [22]:

    
#we don t need to start slicing at 0
print (data[5:10, 7:15])









    



[[  1.   6.   4.   7.   6.   6.   9.   9.]
 [  5.   5.   8.   6.   5.  11.   9.   4.]
 [  3.   5.   3.   7.   8.   8.   5.  10.]
 [  5.   5.   8.   2.   4.  11.  12.  10.]
 [  3.   5.   8.   6.   8.  12.   5.  13.]]



In [23]:

    
# we don t need to include the upper and lower bounds (it automaticlly assumes the begining and the end respectively , in the example above 5 and 7 respectively)



In [24]:

    
smallchunk = data[:3, 36:]



In [25]:

    
print (smallchunk
      )









    



[[ 2.  3.  0.  0.]
 [ 1.  1.  0.  1.]
 [ 2.  2.  1.  1.]]



In [26]:

    
#arithmetic on arrays
doublesmallchunk = smallchunk*2.0



In [27]:

    
print(doublesmallchunk)









    



[[ 4.  6.  0.  0.]
 [ 2.  2.  0.  2.]
 [ 4.  4.  2.  2.]]



In [26]:

    
triplesmallchunk = smallchunk + doublesmallchunk



In [27]:

    
print (triplesmallchunk)









    



[[ 6.  9.  0.  0.]
 [ 3.  3.  0.  3.]
 [ 6.  6.  3.  3.]]



In [28]:

    
print (numpy.mean(data))



In [29]:

    
print (numpy.max(data))



In [30]:

    
print (numpy.min(data))

0.0



In [31]:

    
# do stuff dow column or across rows
#get a set of data for the first station
station_0 = data[0, :]
# everything for row 0, all the columns for row 0



In [32]:

    
print station_0









    



  File "<ipython-input-32-bbb1493e11c5>", line 1
    print station_0
                  ^
SyntaxError: Missing parentheses in call to 'print'



In [33]:

    
print (numpy.max(station_0))



In [34]:

    
#we don t need to create 'temporary'arrat slices
#we can refer to what we call array axes



In [35]:

    
print (numpy.mean(data, axis=0))









    



[  0.           0.45         1.11666667   1.75         2.43333333   3.15
   3.8          3.88333333   5.23333333   5.51666667   5.95         5.9
   8.35         7.73333333   8.36666667   9.5          9.58333333
  10.63333333  11.56666667  12.35        13.25        11.96666667
  11.03333333  10.16666667  10.           8.66666667   9.15         7.25
   7.33333333   6.58333333   6.06666667   5.95         5.11666667   3.6
   3.3          3.56666667   2.48333333   1.5          1.13333333
   0.56666667]



In [36]:

    
print (numpy.mean(data, axis=1))









    



[ 5.45   5.425  6.1    5.9    5.55   6.225  5.975  6.65   6.625  6.525
  6.775  5.8    6.225  5.75   5.225  6.3    6.55   5.7    5.85   6.55
  5.775  5.825  6.175  6.1    5.8    6.425  6.05   6.025  6.175  6.55
  6.175  6.35   6.725  6.125  7.075  5.725  5.925  6.15   6.075  5.75
  5.975  5.725  6.3    5.9    6.75   5.925  7.225  6.15   5.95   6.275  5.7
  6.1    6.825  5.975  6.725  5.7    6.25   6.4    7.05   5.9  ]



In [37]:

    
# axis= 0 is the mean down each column so the mean teperature for each recording period
# axis=1 we get the mean down each row, so the mean temperature for each station for all the periods



In [38]:

    
#do some simple visualisations



In [28]:

    
import matplotlib.pyplot



In [29]:

    
%matplotlib inline



In [30]:

    
image= matplotlib.pyplot.imshow(data)



In [31]:

    
#let's look at the average temp over time
avg_temperature= numpy.mean(data, axis= 0)



In [32]:

    
avg_plot =  matplotlib.pyplot.plot(avg_temperature)



In [33]:

    
min_temperature= numpy.min(data, axis=0)



In [34]:

    
import numpy



In [ ]:



In [35]:

    
min_plot=matplotlib.pyplot.plot(min_temperature)



In [36]:

    
max_temperature= numpy.max(data, axis=0)



In [37]:

    
max_plot= matplotlib.pyplot.plot (max_temperature)



In [ ]: