analysig tabular data
we are going to use a library called numpy
In [4]:
import numpy
In [5]:
numpy.loadtxt(fname='data/weather-01.csv', delimiter = ',')
Out[5]:
In [6]:
weight_kg=55
In [7]:
print (weight_kg)
In [8]:
print ('Weight in pounds: ', weight_kg*2.2)
In [9]:
weight_kg=57.5
In [10]:
print ('New weight:', weight_kg*2.2)
In [11]:
%whos
In [12]:
data = numpy.loadtxt(fname='data/weather-01.csv', delimiter = ',')
In [13]:
print (data
)
In [14]:
print (type(data
))
In [15]:
%whos
In [16]:
# finding out the data type
print (data.dtype)
In [17]:
#find out the shape
print (data.shape)
In [18]:
# this is 60 rows * 40 columns
In [19]:
#getting a single number out of the array
print ("First value in data:", data[0,0])
In [20]:
print ('A middle value:', data [30,20])
In [21]:
#get a slice out of an array
#lets get the first 10 column for the first 4 rows
print(data[0:4, 0:10
])
# start at index 0 and go up to BUT NOT INCLUDING index 4
In [22]:
#we don t need to start slicing at 0
print (data[5:10, 7:15])
In [23]:
# we don t need to include the upper and lower bounds (it automaticlly assumes the begining and the end respectively , in the example above 5 and 7 respectively)
In [24]:
smallchunk = data[:3, 36:]
In [25]:
print (smallchunk
)
In [26]:
#arithmetic on arrays
doublesmallchunk = smallchunk*2.0
In [27]:
print(doublesmallchunk)
In [26]:
triplesmallchunk = smallchunk + doublesmallchunk
In [27]:
print (triplesmallchunk)
In [28]:
print (numpy.mean(data))
In [29]:
print (numpy.max(data))
In [30]:
print (numpy.min(data))
In [31]:
# do stuff dow column or across rows
#get a set of data for the first station
station_0 = data[0, :]
# everything for row 0, all the columns for row 0
In [32]:
print station_0
In [33]:
print (numpy.max(station_0))
In [34]:
#we don t need to create 'temporary'arrat slices
#we can refer to what we call array axes
In [35]:
print (numpy.mean(data, axis=0))
In [36]:
print (numpy.mean(data, axis=1))
In [37]:
# axis= 0 is the mean down each column so the mean teperature for each recording period
# axis=1 we get the mean down each row, so the mean temperature for each station for all the periods
In [38]:
#do some simple visualisations
In [28]:
import matplotlib.pyplot
In [29]:
%matplotlib inline
In [30]:
image= matplotlib.pyplot.imshow(data)
In [31]:
#let's look at the average temp over time
avg_temperature= numpy.mean(data, axis= 0)
In [32]:
avg_plot = matplotlib.pyplot.plot(avg_temperature)
In [33]:
min_temperature= numpy.min(data, axis=0)
In [34]:
import numpy
In [ ]:
In [35]:
min_plot=matplotlib.pyplot.plot(min_temperature)
In [36]:
max_temperature= numpy.max(data, axis=0)
In [37]:
max_plot= matplotlib.pyplot.plot (max_temperature)
In [ ]: