analysig tabular data
we are going to use a library called numpy
In [4]:
    
import numpy
    
In [5]:
    
numpy.loadtxt(fname='data/weather-01.csv', delimiter = ',')
    
    Out[5]:
In [6]:
    
weight_kg=55
    
In [7]:
    
print (weight_kg)
    
    
In [8]:
    
print ('Weight in pounds: ', weight_kg*2.2)
    
    
In [9]:
    
weight_kg=57.5
    
In [10]:
    
print ('New weight:', weight_kg*2.2)
    
    
In [11]:
    
%whos
    
    
In [12]:
    
data = numpy.loadtxt(fname='data/weather-01.csv', delimiter = ',')
    
In [13]:
    
print (data
      )
    
    
In [14]:
    
print (type(data
           ))
    
    
In [15]:
    
%whos
    
    
In [16]:
    
# finding out the data type
print (data.dtype)
    
    
In [17]:
    
#find out the shape 
print (data.shape)
    
    
In [18]:
    
# this is 60 rows * 40 columns
    
In [19]:
    
#getting a single number out of the array
print ("First value in data:", data[0,0])
    
    
In [20]:
    
print ('A middle value:', data [30,20])
    
    
In [21]:
    
#get a slice out of an array
#lets get the first 10 column for the first 4 rows
print(data[0:4, 0:10
        ])
# start at index 0 and go up to BUT NOT INCLUDING index 4
    
    
In [22]:
    
#we don t need to start slicing at 0
print (data[5:10, 7:15])
    
    
In [23]:
    
# we don t need to include the upper and lower bounds (it automaticlly assumes the begining and the end respectively , in the example above 5 and 7 respectively)
    
In [24]:
    
smallchunk = data[:3, 36:]
    
In [25]:
    
print (smallchunk
      )
    
    
In [26]:
    
#arithmetic on arrays
doublesmallchunk = smallchunk*2.0
    
In [27]:
    
print(doublesmallchunk)
    
    
In [26]:
    
triplesmallchunk = smallchunk + doublesmallchunk
    
In [27]:
    
print (triplesmallchunk)
    
    
In [28]:
    
print (numpy.mean(data))
    
    
In [29]:
    
print (numpy.max(data))
    
    
In [30]:
    
print (numpy.min(data))
    
    
In [31]:
    
# do stuff dow column or across rows
#get a set of data for the first station
station_0 = data[0, :]
# everything for row 0, all the columns for row 0
    
In [32]:
    
print station_0
    
    
In [33]:
    
print (numpy.max(station_0))
    
    
In [34]:
    
#we don t need to create 'temporary'arrat slices
#we can refer to what we call array axes
    
In [35]:
    
print (numpy.mean(data, axis=0))
    
    
In [36]:
    
print (numpy.mean(data, axis=1))
    
    
In [37]:
    
# axis= 0 is the mean down each column so the mean teperature for each recording period
# axis=1 we get the mean down each row, so the mean temperature for each station for all the periods
    
In [38]:
    
#do some simple visualisations
    
In [28]:
    
import matplotlib.pyplot
    
In [29]:
    
%matplotlib inline
    
In [30]:
    
image= matplotlib.pyplot.imshow(data)
    
    
In [31]:
    
#let's look at the average temp over time
avg_temperature= numpy.mean(data, axis= 0)
    
In [32]:
    
avg_plot =  matplotlib.pyplot.plot(avg_temperature)
    
    
In [33]:
    
min_temperature= numpy.min(data, axis=0)
    
In [34]:
    
import numpy
    
In [ ]:
    
    
In [35]:
    
min_plot=matplotlib.pyplot.plot(min_temperature)
    
    
In [36]:
    
max_temperature= numpy.max(data, axis=0)
    
In [37]:
    
max_plot= matplotlib.pyplot.plot (max_temperature)
    
    
In [ ]: