In [1]:
import numpy as np
Provides
For more information about introduction to NumPy. Read this Doc
In [2]:
numpy_array = np.array([1, 2, 3.0])
ndarray = block of memory + indexing scheme + data type descriptor
raw data
how to locate an element
how to interpret an element
Ref: Link
In [3]:
numpy_array #Typecasted to float to maintain homogenity
Out[3]:
In [4]:
numpy_array.dtype
Out[4]:
In [5]:
type(numpy_array)
Out[5]:
In [6]:
type(numpy_array[0])
Out[6]:
In [7]:
numpy_array.append(9.0) # Should throw error
In [8]:
a = np.array([[1,2,3],[4,5,6]])
In [9]:
a
Out[9]:
In [10]:
a.shape
Out[10]:
In [11]:
a.strides
Out[11]:
The strides of an array tell us how many bytes we have to skip in memory to move to the next position along a certain axis. This StackOveflow Q/A explains it quite well. Link
In [12]:
b = a.T # Transpose of a
In [13]:
b
Out[13]:
In [14]:
b.shape
Out[14]:
In [15]:
b.strides
Out[15]:
In [16]:
c = a.reshape((6)) # Convert in 1-D array
In [17]:
c
Out[17]:
In [18]:
c.shape
Out[18]:
In [19]:
c.strides
Out[19]:
In [20]:
a.__array_interface__['data'][0] # kinda databuff mem position
Out[20]:
In [21]:
b.__array_interface__['data'][0] # kinda databuff mem position
Out[21]:
In [22]:
c.__array_interface__['data'][0] # kinda databuff mem position
Out[22]:
In [23]:
databuff_mem_loc_a = a.__array_interface__['data'][0]
databuff_mem_loc_b = b.__array_interface__['data'][0]
databuff_mem_loc_c = c.__array_interface__['data'][0]
assert(databuff_mem_loc_a == databuff_mem_loc_b == databuff_mem_loc_c)
In [24]:
a = np.array([1, 2, 3])
b = np.array([2, 3, -1])
In [25]:
a * b # Common Mathematical operations are overridden for arrays
# Element wise multiplication. Use np.dot for matrix multiplication
Out[25]:
In [26]:
np.greater(a,b) # Comparision function
Out[26]:
In [27]:
np.logical_and(a>0, b>0)
Out[27]:
In [28]:
np.sum(a) # Univariate functions, takes in single argument
Out[28]:
In [29]:
np.min(b)
Out[29]:
Thus, a ufunc is a “vectorized” wrapper for a function that takes a fixed number of specific inputs, and produces a fixed number of specific outputs. -Numpy Docs.
Complete list of uFuncs: Link
In [30]:
import pandas as pd
import matplotlib.pyplot as plt
from time import time
%matplotlib inline
In [35]:
def return_time_taken(object_size):
python_list = list(range(object_size))
start_ = time()
_ = [element * element for element in python_list]
end_ = time()
del(python_list)
del(_)
time_taken_lists = end_ - start_
numpy_array = np.arange(object_size)
start_ = time()
numpy_array*numpy_array
end_ = time()
del(numpy_array)
time_taken_np = end_ - start_
return (time_taken_lists, time_taken_np)
In [36]:
df = pd.DataFrame(columns=["C", "tL", "tN"])
In [37]:
object_size = 1
while object_size !=100000000:
results = return_time_taken(object_size)
df = df.append({"C": object_size, "tL": results[0], "tN": results[1]},ignore_index=True) #Optimise this
object_size *= 10
In [38]:
df
Out[38]:
In [39]:
df.plot(x="C",y=["tL","tN"], figsize=(10,5), grid=True)
plt.xlabel("Size of Object")
plt.ylabel("Time in(sec)")
plt.legend(["List looping", "Numpy Vectorization"])
plt.title("Input Object Size vs. Execution Time for *")
plt.show()
In [ ]: