In [1]:
import numpy
import matplotlib.pyplot
%matplotlib inline

In [2]:
data = numpy.loadtxt (fname = 'data/weather-01.csv', delimiter = ',')

In [3]:
print data


[[ 0.  0.  1. ...,  3.  0.  0.]
 [ 0.  1.  2. ...,  1.  0.  1.]
 [ 0.  1.  1. ...,  2.  1.  1.]
 ..., 
 [ 0.  1.  1. ...,  1.  1.  1.]
 [ 0.  0.  0. ...,  0.  2.  0.]
 [ 0.  0.  1. ...,  1.  1.  0.]]

In [4]:
# WARNING: put the following all in the same cell!

# create a wide figure to hold the subplots
fig = matplotlib.pyplot.figure (figsize=(10.0,3.0))

# create placeholders

subplot1 = fig.add_subplot (1,3,1)
subplot2 = fig.add_subplot (1,3,2)
subplot3 = fig.add_subplot (1,3,3)

subplot1.set_ylabel('average')
subplot1.plot(numpy.mean(data, axis=0))

subplot2.set_ylabel('min')
subplot2.plot(numpy.min(data, axis=0))

subplot3.set_ylabel('max')
subplot3.plot(numpy.max(data, axis=0))

fig.tight_layout()
matplotlib.pyplot.show()


Loops


In [5]:
# we can call the letters of a string one by one
word = 'notebook'
print (word[4])


b

In [6]:
# we can loop across every letter:
# INDENTATION IS KEY IN PYTHON!!!

for char in word:
    print (char)


n
o
t
e
b
o
o
k

Get a list of all the filenames from disk


In [7]:
import glob

In [8]:
print (glob.glob('data/weather*.csv')) # * is the wildcard


['data/weather-01.csv', 'data/weather-02.csv', 'data/weather-03.csv', 'data/weather-04.csv', 'data/weather-05.csv', 'data/weather-06.csv', 'data/weather-07.csv', 'data/weather-08.csv', 'data/weather-09.csv', 'data/weather-10.csv', 'data/weather-11.csv', 'data/weather-12.csv']

This above is a list of twelve strings, each string represents a file. We should now be able to produce a series of plots.

Putting it all together:


In [9]:
filenames = sorted(glob.glob('data/weather*.csv')) # sorted it's to make sure they are in order -it's not guaranteed otherwise-

filenames = filenames[0:3]  # variable overwriting, to avoid too much data here

for f in filenames:  # loops across the three filenames
    print (f)
    
    data = numpy.loadtxt(fname = f, delimiter =',')

    # create a wide figure to hold the subplots
    fig = matplotlib.pyplot.figure (figsize=(10.0,3.0))
    
    # create placeholders
    subplot1 = fig.add_subplot (1,3,1)
    subplot2 = fig.add_subplot (1,3,2)
    subplot3 = fig.add_subplot (1,3,3)

    # plot average, min and max graphs
    subplot1.set_ylabel('average')
    subplot1.plot(numpy.mean(data, axis=0))

    subplot2.set_ylabel('min')
    subplot2.plot(numpy.min(data, axis=0))

    subplot3.set_ylabel('max')
    subplot3.plot(numpy.max(data, axis=0))

    #show the graphs with a bit of space between them
    fig.tight_layout()
    matplotlib.pyplot.show()


data/weather-01.csv
data/weather-02.csv
data/weather-03.csv

Making decisions


In [10]:
num = 37

if num > 100:
    print ('Greater')
else:
    print ('Not greater')
    
print 'Done'


Not greater
Done

In [11]:
num = -3

if num > 0:
    print num, 'is positive'
elif num == 0:                 # double equal is used to test (! used to test if not equal)
    print num, 'is zero'
else:
    print num, 'is negative'


-3 is negative

something more on the plots


In [12]:
filenames = sorted(glob.glob('data/weather*.csv')) # sorted it's to make sure they are in order -it's not guaranteed otherwise-

# filenames = filenames[0:3]  # variable overwriting, to avoid too much data here while testing
                              # line commented out after verifying everything works

for f in filenames:  # loops across the three filenames
    print (f)
    
    data = numpy.loadtxt(fname = f, delimiter =',')

# TEST ON DATA:
    
    if numpy.max(data, axis = 0)[0] == 0 and numpy.max (data, axis=0)[20] == 20:
        print 'Suspicious looking maxima'
    elif numpy.sum(numpy.min(data, axis=0)) == 0:
        print 'Minima add up to zero'
    else: 'Data looks ok'
        
    
    # create a wide figure to hold the subplots
    fig = matplotlib.pyplot.figure (figsize=(10.0,3.0))
    
    # create placeholders
    subplot1 = fig.add_subplot (1,3,1)
    subplot2 = fig.add_subplot (1,3,2)
    subplot3 = fig.add_subplot (1,3,3)

    # plot average, min and max graphs
    subplot1.set_ylabel('average')
    subplot1.plot(numpy.mean(data, axis=0))

    subplot2.set_ylabel('min')
    subplot2.plot(numpy.min(data, axis=0))

    subplot3.set_ylabel('max')
    subplot3.plot(numpy.max(data, axis=0))

    #show the graphs with a bit of space between them
    fig.tight_layout()
    matplotlib.pyplot.show()


data/weather-01.csv
Suspicious looking maxima
data/weather-02.csv
Suspicious looking maxima
data/weather-03.csv
Minima add up to zero
data/weather-04.csv
Suspicious looking maxima
data/weather-05.csv
Suspicious looking maxima
data/weather-06.csv
Suspicious looking maxima
data/weather-07.csv
Suspicious looking maxima
data/weather-08.csv
Minima add up to zero
data/weather-09.csv
Suspicious looking maxima
data/weather-10.csv
Suspicious looking maxima
data/weather-11.csv
Minima add up to zero
data/weather-12.csv
Suspicious looking maxima

Functions

Ways to create reusable chunks of code.


In [19]:
def fahr_to_kelvin(temp):
    return ((temp - 32)*(5.0/9.0)+273.15)

In [20]:
print 'Freezing point of water in Kelvin: ', fahr_to_kelvin(32)
print 'Boiling point of water in Kelvin: ', fahr_to_kelvin(200)


Freezing point of water in Kelvin:  273.15
Boiling point of water in Kelvin:  366.483333333

analysis function


In [23]:
def analyse(filename):
    """ Some of our temperature files have problems, this function checks for these.
    
        Good code is 50% about good comments. 
    
    """
    
    data = numpy.loadtxt(fname=filename, delimiter = ',')
    
    # create a wide figure to hold the subplots
    fig = matplotlib.pyplot.figure (figsize=(10.0,3.0))
    
    # create placeholders
    subplot1 = fig.add_subplot (1,3,1)
    subplot2 = fig.add_subplot (1,3,2)
    subplot3 = fig.add_subplot (1,3,3)

    # plot average, min and max graphs
    subplot1.set_ylabel('average')
    subplot1.plot(numpy.mean(data, axis=0))

    subplot2.set_ylabel('min')
    subplot2.plot(numpy.min(data, axis=0))

    subplot3.set_ylabel('max')
    subplot3.plot(numpy.max(data, axis=0))

    #show the graphs with a bit of space between them
    fig.tight_layout()
    matplotlib.pyplot.show()

In [16]:
def detect_problems (filename):
    data = numpy.loadtxt(fname=filename, delimiter = ',')
    
    
# TEST ON DATA:
    
    if numpy.max(data, axis = 0)[0] == 0 and numpy.max (data, axis=0)[20] == 20:
        print 'Suspicious looking maxima'
    elif numpy.sum(numpy.min(data, axis=0)) == 0:
        print 'Minima add up to zero'
    else: 'Data looks ok'
    
    # this is a rough test (if max has problems then min is not checked), but you can write a better one
    # for example by concatenating things with "and"

In [17]:
for f in filenames [0:5]:
    print f
    analyse(f)
    detect_problems(f)


data/weather-01.csv
Suspicious looking maxima
data/weather-02.csv
Suspicious looking maxima
data/weather-03.csv
Minima add up to zero
data/weather-04.csv
Suspicious looking maxima
data/weather-05.csv
Suspicious looking maxima

19th of January 2017


In [22]:
help(numpy.loadtxt)


Help on function loadtxt in module numpy.lib.npyio:

loadtxt(fname, dtype=<type 'float'>, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0)
    Load data from a text file.
    
    Each row in the text file must have the same number of values.
    
    Parameters
    ----------
    fname : file or str
        File, filename, or generator to read.  If the filename extension is
        ``.gz`` or ``.bz2``, the file is first decompressed. Note that
        generators should return byte strings for Python 3k.
    dtype : data-type, optional
        Data-type of the resulting array; default: float.  If this is a
        structured data-type, the resulting array will be 1-dimensional, and
        each row will be interpreted as an element of the array.  In this
        case, the number of columns used must match the number of fields in
        the data-type.
    comments : str or sequence, optional
        The characters or list of characters used to indicate the start of a
        comment;
        default: '#'.
    delimiter : str, optional
        The string used to separate values.  By default, this is any
        whitespace.
    converters : dict, optional
        A dictionary mapping column number to a function that will convert
        that column to a float.  E.g., if column 0 is a date string:
        ``converters = {0: datestr2num}``.  Converters can also be used to
        provide a default value for missing data (but see also `genfromtxt`):
        ``converters = {3: lambda s: float(s.strip() or 0)}``.  Default: None.
    skiprows : int, optional
        Skip the first `skiprows` lines; default: 0.
    usecols : sequence, optional
        Which columns to read, with 0 being the first.  For example,
        ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
        The default, None, results in all columns being read.
    unpack : bool, optional
        If True, the returned array is transposed, so that arguments may be
        unpacked using ``x, y, z = loadtxt(...)``.  When used with a structured
        data-type, arrays are returned for each field.  Default is False.
    ndmin : int, optional
        The returned array will have at least `ndmin` dimensions.
        Otherwise mono-dimensional axes will be squeezed.
        Legal values: 0 (default), 1 or 2.
    
        .. versionadded:: 1.6.0
    
    Returns
    -------
    out : ndarray
        Data read from the text file.
    
    See Also
    --------
    load, fromstring, fromregex
    genfromtxt : Load data with missing values handled as specified.
    scipy.io.loadmat : reads MATLAB data files
    
    Notes
    -----
    This function aims to be a fast reader for simply formatted files.  The
    `genfromtxt` function provides more sophisticated handling of, e.g.,
    lines with missing values.
    
    .. versionadded:: 1.10.0
    
    The strings produced by the Python float.hex method can be used as
    input for floats.
    
    Examples
    --------
    >>> from io import StringIO   # StringIO behaves like a file object
    >>> c = StringIO("0 1\n2 3")
    >>> np.loadtxt(c)
    array([[ 0.,  1.],
           [ 2.,  3.]])
    
    >>> d = StringIO("M 21 72\nF 35 58")
    >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'),
    ...                      'formats': ('S1', 'i4', 'f4')})
    array([('M', 21, 72.0), ('F', 35, 58.0)],
          dtype=[('gender', '|S1'), ('age', '<i4'), ('weight', '<f4')])
    
    >>> c = StringIO("1,0,2\n3,0,4")
    >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True)
    >>> x
    array([ 1.,  3.])
    >>> y
    array([ 2.,  4.])


In [24]:
help (analyse)


Help on function analyse in module __main__:

analyse(filename)
    Some of our temperature files have problems, this function checks for these.
    
    Good code is 50% about good comments.


In [ ]: