## analysing tabular data

``````

In [ ]:

import numpy

``````
``````

In [ ]:

``````
``````

In [ ]:

``````
``````

In [ ]:

``````
``````

In [ ]:

``````

# variables

``````

In [ ]:

weight_kg=55

``````
``````

In [ ]:

print (weight_kg)

``````
``````

In [ ]:

print('weight in pounds:',weight_kg*2.2)

``````
``````

In [ ]:

``````
``````

In [ ]:

``````
``````

In [ ]:

``````
``````

In [ ]:

%whos

``````
``````

In [ ]:

``````
``````

In [ ]:

%whos

``````
``````

In [ ]:

%whos

``````
``````

In [ ]:

print(data.dtype)

``````
``````

In [ ]:

print(data.shape)

``````

# this is 60 by 40

``````

In [ ]:

print ("first value in data:",data [0,0])

``````
``````

In [ ]:

print ('A middle value:',data[30,20])

``````

# lets get the first 10 columns for the firsst 4 rows

print(data[0:4, 0:10])

# start at index 0 and go up to but not including index 4

``````

In [ ]:

print (data[0:4, 0:10])

``````

# we dont need to start slicng at 0

``````

In [ ]:

print (data[5:10,7:15])

``````

# we dont even need to inc upper and lower limits

``````

In [ ]:

smallchunk=data[:3,36:]
print(smallchunk)

``````

# arithmetic on arrays

``````

In [ ]:

doublesmallchunk=smallchunk*2.0

``````
``````

In [ ]:

print(doublesmallchunk)

``````
``````

In [ ]:

triplesmallchunk=smallchunk+doublesmallchunk

``````
``````

In [ ]:

print(triplesmallchunk)

``````
``````

In [ ]:

print(numpy.mean(data))

``````
``````

In [ ]:

print (numpy.max(data))

``````
``````

In [ ]:

print (numpy.min(data))

``````

# this is shorthand for "all the columns"

``````

In [ ]:

station_0=data[0,:]

``````
``````

In [ ]:

print(numpy.max(station_0))

``````

# we can refer to what we call array axes

``````

In [ ]:

print(numpy.mean(data, axis=0))

``````
``````

In [ ]:

print(numpy.mean(data, axis=1))

``````

# do some simple vissualisations

``````

In [ ]:

import matplotlib.pyplot

``````
``````

In [ ]:

%matplotlib inline

``````
``````

In [ ]:

image=matplotlib.pyplot.imshow(data)

``````

# lets look at the average tempp over time

``````

In [ ]:

avg_temperature=numpy.mean(data,axis=0)

``````
``````

In [ ]:

avg_plot=matplotlib.pyplot.plot(avg_temperature)

``````
``````

In [ ]:

import numpy

``````
``````

In [ ]:

import matplotlib.pyplot

``````
``````

In [ ]:

%matplotlib inline

``````
``````

In [ ]:

``````

# create a wide figure to hold sub plots

``````

In [ ]:

fig=matplotlib.pyplot.figure (figsize=(10.0,3.0))

``````

# create placeholders for plots

``````

In [ ]:

``````
``````

In [ ]:

fig=matplotlib.pyplot.figure (figsize=(10.0,3.0))

subplot1.set_ylabel('average')
subplot1.plot(numpy.mean(data, axis=0))

subplot2.set_ylabel('minimum')
subplot2.plot(numpy.min(data, axis=0))

subplot3.set_ylabel('maximum')
subplot3.plot(numpy.max(data, axis=0))

``````

# loops

``````

In [ ]:

word='notebook'
print (word[4])

``````

# see aabove note diff between squaare and normaal brackets

``````

In [ ]:

for char in word:
# colon before word or indentation v imporetaant
#indent is 4 spaces

``````
``````

In [ ]:

for char in word:
print (char)

``````

# get a list of all the filenames from disk

``````

In [ ]:

import glob

``````

# global..something~

``````

In [ ]:

print(glob.glob('data/weather*.csv'))

``````

# putting it all together

``````

In [ ]:

filenames=sorted(glob.glob('data/weather*.csv'))
filenames=filenames[0:3]

for f in filenames:
print (f)

#next bits need indenting

fig=matplotlib.pyplot.figure (figsize=(10.0,3.0))

subplot1.set_ylabel('average')
subplot1.plot(numpy.mean(data, axis=0))

subplot2.set_ylabel('minimum')
subplot2.plot(numpy.min(data, axis=0))

subplot3.set_ylabel('maximum')
subplot3.plot(numpy.max(data, axis=0))

fig.tight_layout()
matplotlib.pyplot.show

``````
``````

In [ ]:

``````
``````

In [ ]:

num=37
if num>100:
print('greater')
else:
print('not greater')
print ('done')

``````
``````

In [ ]:

num=107
if num>100:
print('greater')
else:
print('not greater')
print ('done')

``````

# didnt print "done" due to break in indentation sequence

``````

In [ ]:

num=-3

if num>0:
print (num, "is positive")
elif num ==0:
print (num, "is zero")
else:
print (num, "is negative")

``````

# elif eqauls else if, always good to finish a chain with an else

``````

In [ ]:

filenames=sorted(glob.glob('data/weather*.csv'))

``````
``````

In [ ]:

filenames=sorted(glob.glob('data/weather*.csv'))
filenames=filenames[0:3]

for f in filenames:
print (f)
if numpy.max (data, axis=0)[0] ==0 and numpy.max (data, axis=0)[20] ==20:
print ('suspicious looking maxima')
elif numpy.sum(numpy.min(data, axis=0)) ==0:
else:
print ('data looks ok')

#next bits need indenting

fig=matplotlib.pyplot.figure (figsize=(10.0,3.0))

subplot1.set_ylabel('average')
subplot1.plot(numpy.mean(data, axis=0))

subplot2.set_ylabel('minimum')
subplot2.plot(numpy.min(data, axis=0))

subplot3.set_ylabel('maximum')
subplot3.plot(numpy.max(data, axis=0))

fig.tight_layout()
matplotlib.pyplot.show

``````

# something went wrong with the above

``````

In [ ]:

def fahr_to_kelvin(temp):
return((temp-32)*(5/9)+ 273.15)

``````
``````

In [ ]:

print ('freezing point of water:', fahr_to_kelvin(32))

``````
``````

In [ ]:

print ('boiling point of water:', fahr_to_kelvin(212))

``````

# using functions

``````

In [ ]:

def analyse (filename):

``````

# unfinsinshed

``````

In [ ]:

def detect_problems (filename):

if numpy.max (data, axis=0)[0] ==0 and numpy.max (data, axis=0)[20] ==20:
print ('suspicious looking maxima')
elif numpy.sum(numpy.min(data, axis=0)) ==0:
else:
print ('data looks ok')

``````
``````

In [ ]:

for f in filenames [0:5]:
print (f)
analyse (f)
detect_problems (f)

``````
``````

In [ ]:

def analyse (filename):

fig=matplotlib.pyplot.figure (figsize=(10.0,3.0))

subplot1.set_ylabel('average')
subplot1.plot(numpy.mean(data, axis=0))

subplot2.set_ylabel('minimum')
subplot2.plot(numpy.min(data, axis=0))

subplot3.set_ylabel('maximum')
subplot3.plot(numpy.max(data, axis=0))

fig.tight_layout()
matplotlib.pyplot.show

``````
``````

In [ ]:

for f in filenames [0:5]:
print (f)
analyse (f)
detect_problems (f)

``````
``````

In [ ]:

``````
``````

In [ ]:

``````
``````

In [ ]:

``````
``````

In [ ]:

``````
``````

In [ ]:

help(detect_problems)

``````
``````

In [ ]:

"""some of our temperature files haave problems, check for these

this function reads a file and reports on odd looking maxima and minimia that add to zero
the function does not return any data
"""

def detect_problems (filename):

if numpy.max (data, axis=0)[0] ==0 and numpy.max (data, axis=0)[20] ==20:
print ('suspicious looking maxima')
elif numpy.sum(numpy.min(data, axis=0)) ==0:
else:
print ('data looks ok')

``````
``````

In [ ]:

def analyse (filename):

""" this function analyses a dataset and outputs plots for maax min and ave
"""

fig=matplotlib.pyplot.figure (figsize=(10.0,3.0))

subplot1.set_ylabel('average')
subplot1.plot(numpy.mean(data, axis=0))

subplot2.set_ylabel('minimum')
subplot2.plot(numpy.min(data, axis=0))

subplot3.set_ylabel('maximum')
subplot3.plot(numpy.max(data, axis=0))

fig.tight_layout()
matplotlib.pyplot.show

``````
``````

In [ ]:

``````