In [1]:
import numpy
import matplotlib.pyplot
%matplotlib inline

In [2]:
data = numpy.loadtxt (fname='Data/weather-01.csv', delimiter = ',')

In [3]:
# create a wide figure to hold the sublots
fig = matplotlib.pyplot.figure (figsize=(10.0, 3.0))

# create placeholder for plots
subplot1 = fig.add_subplot(1,3,1)
subplot2 = fig.add_subplot(1,3,2)
subplot3 = fig.add_subplot(1,3,3)

subplot1.set_ylabel('average')
subplot1.plot(numpy.mean(data, axis=0))

subplot2.set_ylabel('max')
subplot2.plot(numpy.max(data, axis=0))

subplot3.set_ylabel('min')
subplot3.plot(numpy.min(data, axis=0))

fig.tight_layout()
# how far away the figures are- tight_layout is further apart than default
matplotlib.pyplot.show()

#adding a label to each subplot and setting the data to go in each subplot
#will create three graphs side by side


Loops


In [4]:
word = 'notebook'
print (word[4])


b

In [5]:
# for loops, char is counter variable
for char in word:
    print (char)


n
o
t
e
b
o
o
k

get a list of all the filenames from disk


In [6]:
import glob
# global file system library has only one function in it

In [7]:
print (glob.glob('data/weather*.csv'))
# all files start with weather and end with .csv * means there can be anything in between


['data\\weather-01.csv', 'data\\weather-02.csv', 'data\\weather-03.csv', 'data\\weather-04.csv', 'data\\weather-05.csv', 'data\\weather-06.csv', 'data\\weather-07.csv', 'data\\weather-08.csv', 'data\\weather-09.csv', 'data\\weather-10.csv', 'data\\weather-11.csv', 'data\\weather-12.csv']

Putting it all together


In [8]:
# new variable
filenames = sorted(glob.glob('data/weather*.csv'))

#overwrite 'filenames' with a different value to create a smaller slice 
#filenames = filenames[0:3]
#can add a '#' before code to not use it without deleting it

for f in filenames:
    print (f)
    #loop across individual file names in 'filenames' and indent lines below so that it is part of the loop
    
    data = numpy.loadtxt(fname=f, delimiter=',')
    
    if numpy.max (data, axis=0)[0] == 0 and numpy.max (data, axis=0)[20] == 20:
        print ('suspicious looking maxima')
    elif numpy.sum(numpy.min(data, axis=0)) ==0:
        print ('minimum add to zero')
    else:
        print ('data looks ok')
    #if the first data point is o and the 20th one is 20 or if all the data is 0, hen there is something wrong otherwise carry on
    #shows message over graph
    
    # create a wide figure to hold the sublots
    fig = matplotlib.pyplot.figure (figsize=(10.0, 3.0))

    # create placeholder for plots
    subplot1 = fig.add_subplot(1,3,1)
    subplot2 = fig.add_subplot(1,3,2)
    subplot3 = fig.add_subplot(1,3,3)

    subplot1.set_ylabel('average')
    subplot1.plot(numpy.mean(data, axis=0))

    subplot2.set_ylabel('max')
    subplot2.plot(numpy.max(data, axis=0))

    subplot3.set_ylabel('min')
    subplot3.plot(numpy.min(data, axis=0))

    fig.tight_layout()
    matplotlib.pyplot.show()


data\weather-01.csv
suspicious looking maxima
data\weather-02.csv
suspicious looking maxima
data\weather-03.csv
minimum add to zero
data\weather-04.csv
suspicious looking maxima
data\weather-05.csv
suspicious looking maxima
data\weather-06.csv
suspicious looking maxima
data\weather-07.csv
suspicious looking maxima
data\weather-08.csv
minimum add to zero
data\weather-09.csv
suspicious looking maxima
data\weather-10.csv
suspicious looking maxima
data\weather-11.csv
minimum add to zero
data\weather-12.csv
suspicious looking maxima

Making decisions


In [9]:
num = 107
if num > 100:
    print ('Greater')
else:
    print ("Not Greater")
    print ('Done')


Greater

In [10]:
num = -3

if num >0:
    print (num, "is positive")
elif num == 0:
    print (num, "is zero")
else:
    print (num, "is negative")
    
    #single = is to assign a value to the variable, double == is for matamaticaly equal, =? is not equal to
    #elif is short for else if, do another if test if the first one fails


-3 is negative

Functions


In [11]:
# functions start with def ie define
def fahr_to_kelvin(temp):
    return((temp-32)*(5/9)+273.15)
#doesn't do anything but this function is there to be used whenever

In [12]:
print ('freezing point of water:', fahr_to_kelvin(32))


freezing point of water: 273.15

In [13]:
print ('boiling point of water:', fahr_to_kelvin(212))


boiling point of water: 373.15

In [14]:
def fahr_to_cel(temp):
    return((temp-32)*(5/9))

In [15]:
print ('cross over point:', fahr_to_cel(-40))


cross over point: -40.0

In [16]:
def analyse(filename):
    """ Calculate the mean max and min and create three graphs within one plot
    """
    data = numpy.loadtxt(fname=filename, delimiter=',')
    # create a wide figure to hold the sublots
    fig = matplotlib.pyplot.figure(figsize=(10.0, 3.0))

    # create placeholder for plots
    subplot1 = fig.add_subplot(1, 3, 1)
    subplot2 = fig.add_subplot(1, 3, 2)
    subplot3 = fig.add_subplot(1, 3, 3)

    subplot1.set_ylabel('average')
    subplot1.plot(numpy.mean(data, axis=0))

    subplot2.set_ylabel('max')
    subplot2.plot(numpy.max(data, axis=0))

    subplot3.set_ylabel('min')
    subplot3.plot(numpy.min(data, axis=0))

    fig.tight_layout()
    matplotlib.pyplot.show()

In [17]:
def detect_problems(filename):
    """Reports on odd looking maxima and minima. The function does not return data it adds
    a label to the graph
    """
    data = numpy.loadtxt(fname=filename, delimiter=',')
    
    if numpy.max(data, axis=0)[0] == 0 and numpy.max (data, axis=0)[20] == 20:
        print('suspicious looking maxima')
    elif numpy.sum(numpy.min(data, axis=0)) ==0:
        print('minimum add to zero')
    else:
        print('data looks ok')
        #the string is a comment on the function to describe what it does. triple quote strings cover multiple lines

In [18]:
for f in filenames [0:5]:
    print (f)
    analyse (f)
    detect_problems (f)


data\weather-01.csv
suspicious looking maxima
data\weather-02.csv
suspicious looking maxima
data\weather-03.csv
minimum add to zero
data\weather-04.csv
suspicious looking maxima
data\weather-05.csv
suspicious looking maxima

In [19]:
help(numpy.loadtxt)


Help on function loadtxt in module numpy.lib.npyio:

loadtxt(fname, dtype=<class 'float'>, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0)
    Load data from a text file.
    
    Each row in the text file must have the same number of values.
    
    Parameters
    ----------
    fname : file or str
        File, filename, or generator to read.  If the filename extension is
        ``.gz`` or ``.bz2``, the file is first decompressed. Note that
        generators should return byte strings for Python 3k.
    dtype : data-type, optional
        Data-type of the resulting array; default: float.  If this is a
        structured data-type, the resulting array will be 1-dimensional, and
        each row will be interpreted as an element of the array.  In this
        case, the number of columns used must match the number of fields in
        the data-type.
    comments : str or sequence, optional
        The characters or list of characters used to indicate the start of a
        comment;
        default: '#'.
    delimiter : str, optional
        The string used to separate values.  By default, this is any
        whitespace.
    converters : dict, optional
        A dictionary mapping column number to a function that will convert
        that column to a float.  E.g., if column 0 is a date string:
        ``converters = {0: datestr2num}``.  Converters can also be used to
        provide a default value for missing data (but see also `genfromtxt`):
        ``converters = {3: lambda s: float(s.strip() or 0)}``.  Default: None.
    skiprows : int, optional
        Skip the first `skiprows` lines; default: 0.
    usecols : sequence, optional
        Which columns to read, with 0 being the first.  For example,
        ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
        The default, None, results in all columns being read.
    unpack : bool, optional
        If True, the returned array is transposed, so that arguments may be
        unpacked using ``x, y, z = loadtxt(...)``.  When used with a structured
        data-type, arrays are returned for each field.  Default is False.
    ndmin : int, optional
        The returned array will have at least `ndmin` dimensions.
        Otherwise mono-dimensional axes will be squeezed.
        Legal values: 0 (default), 1 or 2.
    
        .. versionadded:: 1.6.0
    
    Returns
    -------
    out : ndarray
        Data read from the text file.
    
    See Also
    --------
    load, fromstring, fromregex
    genfromtxt : Load data with missing values handled as specified.
    scipy.io.loadmat : reads MATLAB data files
    
    Notes
    -----
    This function aims to be a fast reader for simply formatted files.  The
    `genfromtxt` function provides more sophisticated handling of, e.g.,
    lines with missing values.
    
    .. versionadded:: 1.10.0
    
    The strings produced by the Python float.hex method can be used as
    input for floats.
    
    Examples
    --------
    >>> from io import StringIO   # StringIO behaves like a file object
    >>> c = StringIO("0 1\n2 3")
    >>> np.loadtxt(c)
    array([[ 0.,  1.],
           [ 2.,  3.]])
    
    >>> d = StringIO("M 21 72\nF 35 58")
    >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'),
    ...                      'formats': ('S1', 'i4', 'f4')})
    array([('M', 21, 72.0), ('F', 35, 58.0)],
          dtype=[('gender', '|S1'), ('age', '<i4'), ('weight', '<f4')])
    
    >>> c = StringIO("1,0,2\n3,0,4")
    >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True)
    >>> x
    array([ 1.,  3.])
    >>> y
    array([ 2.,  4.])


In [20]:
help(detect_problems)


Help on function detect_problems in module __main__:

detect_problems(filename)
    Some of our temperatures have problems check for these
    Function reads file (filename) and reports on odd looking maxima and minima. the function does not return data it adds
    a label to the graph


In [21]:
# we want a description about what this function does, it is useful to have this to remember in the future

In [22]:
help(analyse)


Help on function analyse in module __main__:

analyse(filename)
    Analyse a set of data, calculate the mean max and min and create three graphs side by sid within one plot


In [ ]: