## analysing tabular data

In [ ]:

import numpy

In [ ]:

In [ ]:

In [ ]:

In [ ]:

# variables

In [ ]:

weight_kg=55

In [ ]:

print (weight_kg)

In [ ]:

print('weight in pounds:',weight_kg*2.2)

In [ ]:

In [ ]:

In [ ]:

In [ ]:

%whos

In [ ]:

In [ ]:

%whos

In [ ]:

%whos

In [ ]:

print(data.dtype)

In [ ]:

print(data.shape)

# this is 60 by 40

``````

In [ ]:

print ("first value in data:",data [0,0])

In [ ]:

print ('A middle value:',data[30,20])

``````

# lets get the first 10 columns for the firsst 4 rows

print(data[0:4, 0:10])

# start at index 0 and go up to but not including index 4

In [ ]:

print (data[0:4, 0:10])

# we dont need to start slicng at 0

In [ ]:

print (data[5:10,7:15])

# we dont even need to inc upper and lower limits

In [ ]:

smallchunk=data[:3,36:]
print(smallchunk)

# arithmetic on arrays

In [ ]:

doublesmallchunk=smallchunk*2.0

In [ ]:

print(doublesmallchunk)

In [ ]:

triplesmallchunk=smallchunk+doublesmallchunk

In [ ]:

print(triplesmallchunk)

In [ ]:

print(numpy.mean(data))

In [ ]:

print (numpy.max(data))

In [ ]:

print (numpy.min(data))

# this is shorthand for "all the columns"

In [ ]:

station_0=data[0,:]

In [ ]:

print(numpy.max(station_0))

# we can refer to what we call array axes

In [ ]:

print(numpy.mean(data, axis=0))

In [ ]:

print(numpy.mean(data, axis=1))

# do some simple vissualisations

In [ ]:

import matplotlib.pyplot

In [ ]:

%matplotlib inline

In [ ]:

image=matplotlib.pyplot.imshow(data)

# lets look at the average tempp over time

In [ ]:

avg_temperature=numpy.mean(data,axis=0)

In [ ]:

avg_plot=matplotlib.pyplot.plot(avg_temperature)

In [ ]:

import numpy

In [ ]:

import matplotlib.pyplot

In [ ]:

%matplotlib inline

In [ ]:

# create a wide figure to hold sub plots

In [ ]:

fig=matplotlib.pyplot.figure (figsize=(10.0,3.0))

# create placeholders for plots

In [ ]:

In [ ]:

fig=matplotlib.pyplot.figure (figsize=(10.0,3.0))

subplot1.set_ylabel('average')
subplot1.plot(numpy.mean(data, axis=0))

subplot2.set_ylabel('minimum')
subplot2.plot(numpy.min(data, axis=0))

subplot3.set_ylabel('maximum')
subplot3.plot(numpy.max(data, axis=0))

# loops

In [ ]:

word='notebook'
print (word[4])

# see aabove note diff between squaare and normaal brackets

In [ ]:

for char in word:
# colon before word or indentation v imporetaant
#indent is 4 spaces

In [ ]:

for char in word:
print (char)

# get a list of all the filenames from disk

In [ ]:

import glob

# global..something~

In [ ]:

print(glob.glob('data/weather*.csv'))

# putting it all together

In [ ]:

filenames=sorted(glob.glob('data/weather*.csv'))
filenames=filenames[0:3]

for f in filenames:
print (f)

#next bits need indenting

fig=matplotlib.pyplot.figure (figsize=(10.0,3.0))

subplot1.set_ylabel('average')
subplot1.plot(numpy.mean(data, axis=0))

subplot2.set_ylabel('minimum')
subplot2.plot(numpy.min(data, axis=0))

subplot3.set_ylabel('maximum')
subplot3.plot(numpy.max(data, axis=0))

fig.tight_layout()
matplotlib.pyplot.show

In [ ]:

In [ ]:

num=37
if num>100:
print('greater')
else:
print('not greater')
print ('done')

In [ ]:

num=107
if num>100:
print('greater')
else:
print('not greater')
print ('done')

# didnt print "done" due to break in indentation sequence

In [ ]:

num=-3

if num>0:
print (num, "is positive")
elif num ==0:
print (num, "is zero")
else:
print (num, "is negative")

# elif eqauls else if, always good to finish a chain with an else

In [ ]:

filenames=sorted(glob.glob('data/weather*.csv'))

In [ ]:

filenames=sorted(glob.glob('data/weather*.csv'))
filenames=filenames[0:3]

for f in filenames:
print (f)
if numpy.max (data, axis=0)[0] ==0 and numpy.max (data, axis=0)[20] ==20:
print ('suspicious looking maxima')
elif numpy.sum(numpy.min(data, axis=0)) ==0:
else:
print ('data looks ok')

#next bits need indenting

fig=matplotlib.pyplot.figure (figsize=(10.0,3.0))

subplot1.set_ylabel('average')
subplot1.plot(numpy.mean(data, axis=0))

subplot2.set_ylabel('minimum')
subplot2.plot(numpy.min(data, axis=0))

subplot3.set_ylabel('maximum')
subplot3.plot(numpy.max(data, axis=0))

fig.tight_layout()
matplotlib.pyplot.show

# something went wrong with the above

In [ ]:

def fahr_to_kelvin(temp):
return((temp-32)*(5/9)+ 273.15)

In [ ]:

print ('freezing point of water:', fahr_to_kelvin(32))

In [ ]:

print ('boiling point of water:', fahr_to_kelvin(212))

# using functions

In [ ]:

def analyse (filename):

# unfinsinshed

In [ ]:

def detect_problems (filename):

if numpy.max (data, axis=0)[0] ==0 and numpy.max (data, axis=0)[20] ==20:
print ('suspicious looking maxima')
elif numpy.sum(numpy.min(data, axis=0)) ==0:
else:
print ('data looks ok')

In [ ]:

for f in filenames [0:5]:
print (f)
analyse (f)
detect_problems (f)

In [ ]:

def analyse (filename):

fig=matplotlib.pyplot.figure (figsize=(10.0,3.0))

subplot1.set_ylabel('average')
subplot1.plot(numpy.mean(data, axis=0))

subplot2.set_ylabel('minimum')
subplot2.plot(numpy.min(data, axis=0))

subplot3.set_ylabel('maximum')
subplot3.plot(numpy.max(data, axis=0))

fig.tight_layout()
matplotlib.pyplot.show

In [ ]:

for f in filenames [0:5]:
print (f)
analyse (f)
detect_problems (f)

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

help(detect_problems)

In [ ]:

"""some of our temperature files haave problems, check for these

this function reads a file and reports on odd looking maxima and minimia that add to zero
the function does not return any data
"""

def detect_problems (filename):

if numpy.max (data, axis=0)[0] ==0 and numpy.max (data, axis=0)[20] ==20:
print ('suspicious looking maxima')
elif numpy.sum(numpy.min(data, axis=0)) ==0:
else:
print ('data looks ok')

In [ ]:

def analyse (filename):

""" this function analyses a dataset and outputs plots for maax min and ave
"""

fig=matplotlib.pyplot.figure (figsize=(10.0,3.0))

subplot1.set_ylabel('average')
subplot1.plot(numpy.mean(data, axis=0))

subplot2.set_ylabel('minimum')
subplot2.plot(numpy.min(data, axis=0))

subplot3.set_ylabel('maximum')
subplot3.plot(numpy.max(data, axis=0))

fig.tight_layout()
matplotlib.pyplot.show

In [ ]:

