NumPy is a Linear Algebra Library for Python.
NumPy’s main object is the homogeneous multidimensional array. It is a table of elements (usually numbers), all of the same type, indexed by a tuple of positive integers. In NumPy dimensions are called axes. The number of axes is rank. For example, the coordinates of a point in 3D space [1, 2, 1] is an array of rank 1, because it has one axis. That axis has a length of 3. In the example pictured below, the array has rank 2 (it is 2-dimensional).
Numpy is also incredibly fast, as it has bindings to C libraries.
For easy installing Numpy:
sudo pip3 install numpy
In [1]:
import numpy as np
a = [1,2,3]
a
Out[1]:
In [2]:
b = np.array(a)
b
Out[2]:
In [3]:
np.arange(1, 10)
Out[3]:
In [4]:
np.arange(1, 10, 2)
Out[4]:
In [5]:
np.zeros(2, dtype=float)
Out[5]:
In [5]:
np.zeros((2,3))
Out[5]:
In [7]:
np.ones(3, )
Out[7]:
In [8]:
np.eye(3)
Out[8]:
In [9]:
np.linspace(1, 11, 3)
Out[9]:
In [10]:
np.random.rand(2)
Out[10]:
In [11]:
np.random.rand(2,3,4)
Out[11]:
In [12]:
np.random.randn(2,3)
Out[12]:
In [13]:
np.random.random()
Out[13]:
In [14]:
np.random.randint(1,50,10)
Out[14]:
In [15]:
np.random.randint(1,40)
Out[15]:
In [16]:
zero = np.zeros([3,4])
print(zero , ' ' ,'shape of a :' , zero.shape)
zero = zero.reshape([2,6])
print()
print(zero)
In [17]:
number = np.array([[1,2,],
[3,4]])
number2 = np.array([[1,3],[2,1]])
print('element wise product :\n',number * number2 )
print('matrix product :\n',number.dot(number2)) ## also can use : np.dot(number, number2)
In [18]:
numbers = np.random.randint(1,100, 10)
print(numbers)
print('max is :', numbers.max())
print('index of max :', numbers.argmax())
print('min is :', numbers.min())
print('index of min :', numbers.argmin())
print('mean :', numbers.mean())
numpy also has some funtion for mathmatical operation like exp, log, sqrt, abs and etc .
for find more function click here
In [19]:
number = np.arange(1,10).reshape(3,3)
print(number)
print()
print('exp:\n', np.exp(number))
print()
print('sqrt:\n',np.sqrt(number))
In [20]:
numbers.dtype
Out[20]:
In [21]:
number = np.arange(0,20)
number2 = number
print (number is number2 , id(number), id(number2))
print(number)
number2.shape = (4,5)
print(number)
### Shallow copy
Different array objects can share the same data. The view method creates a new array object that looks at the same data.
In [22]:
number = np.arange(0,20)
number2 = number.view()
print (number is number2 , id(number), id(number2))
In [23]:
number2.shape = (5,4)
print('number2 shape:', number2.shape,'\nnumber shape:', number.shape)
In [24]:
print('befor:', number)
number2[0][0] = 2222
print()
print('after:', number)
### Deep copy
The copy method makes a complete copy of the array and its data.
In [25]:
number = np.arange(0,20)
number2 = number.copy()
print (number is number2 , id(number), id(number2))
In [26]:
print('befor:', number)
number2[0] = 10
print()
print('after:', number)
print()
print('number2:',number2)
In [27]:
number = np.arange(1,11)
num = 2
print(' number =', number)
print('\n number .* num =',number * num)
In [28]:
number = np.arange(1,10).reshape(3,3)
number2 = np.arange(1,4).reshape(1,3)
number * number2
Out[28]:
In [29]:
number = np.array([1,2,3])
print('number =', number)
print('\nnumber =', number + 100)
In [30]:
number = np.arange(1,10).reshape(3,3)
number2 = np.arange(1,4)
print('number: \n', number)
add = number + number2
print()
print('number2: \n ', number2)
print()
print('add: \n', add)
In [31]:
from time import time
a = np.random.rand(8000000, 1)
c = 0
tic = time()
for i in range(len(a)):
c +=(a[i][0] * a[i][0])
print ('output1:', c)
tak = time()
print('multiply 2 matrix with loop: ', tak - tic)
tic = time()
print('output2:', np.dot(a.T, a))
tak = time()
print('multiply 2 matrix with numpy func: ', tak - tic)
In [6]:
import pandas as pd
In [33]:
labels = ['a','b','c']
my_list = [10,20,30]
arr = np.array([10,20,30])
d = {'a':10,'b':20,'c':30}
In [34]:
pd.Series(data=my_list)
Out[34]:
In [35]:
pd.Series(data=my_list,index=labels)
Out[35]:
In [36]:
pd.Series(d)
Out[36]:
In [7]:
dataframe = pd.DataFrame(np.random.randn(5,4),columns=['A','B','V','D'])
In [8]:
dataframe.head()
Out[8]:
In [9]:
dataframe['A']
Out[9]:
In [10]:
dataframe[['A', 'D']]
Out[10]:
In [11]:
dataframe['E'] = dataframe['A'] + dataframe['B']
In [12]:
dataframe
Out[12]:
In [14]:
dataframe.drop('E', axis=1)
Out[14]:
In [44]:
dataframe
Out[44]:
In [45]:
dataframe.drop('E', axis=1, inplace=True)
dataframe
Out[45]:
In [46]:
dataframe.loc[0]
Out[46]:
In [47]:
dataframe.iloc[0]
Out[47]:
In [48]:
dataframe.loc[0 , 'A']
Out[48]:
In [49]:
dataframe.loc[[0,2],['A', 'C']]
Out[49]:
In [50]:
dataframe > 0.3
Out[50]:
In [51]:
dataframe[dataframe > 0.3 ]
Out[51]:
In [52]:
dataframe[dataframe['A']>0.3]
Out[52]:
In [53]:
dataframe[dataframe['A']>0.3]['B']
Out[53]:
In [54]:
dataframe[(dataframe['A']>0.5) & (dataframe['C'] > 0)]
Out[54]:
In [12]:
layer1 = ['g1','g1','g1','g2','g2','g2']
layer2 = [1,2,3,1,2,3]
hier_index = list(zip(layer1,layer2))
hier_index = pd.MultiIndex.from_tuples(hier_index)
In [13]:
hier_index
Out[13]:
In [14]:
dataframe2 = pd.DataFrame(np.random.randn(6,2),index=hier_index,columns=['A','B'])
In [15]:
dataframe2
Out[15]:
In [58]:
dataframe2.loc['g1']
Out[58]:
In [59]:
dataframe2.loc['g1'].loc[1]
Out[59]:
In [60]:
titanic = pd.read_csv('Datasets/titanic.csv')
In [ ]:
pd.read
In [61]:
titanic.head()
Out[61]:
In [62]:
titanic.drop('Name', axis=1 , inplace = True)
In [63]:
titanic.head()
Out[63]:
In [64]:
titanic.to_csv('Datasets/titanic_drop_names.csv')
csv is one of the most important format but Pandas compatible with many other format like html table , sql, json and etc.
In [65]:
titanic.head()
Out[65]:
In [66]:
titanic.dropna()
Out[66]:
In [67]:
titanic.dropna(axis=1)
Out[67]:
In [68]:
titanic.fillna('Fill NaN').head()
Out[68]:
In [16]:
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']},
index=[0, 1, 2, 3])
df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
'B': ['B4', 'B5', 'B6', 'B7'],
'C': ['C4', 'C5', 'C6', 'C7'],
'D': ['D4', 'D5', 'D6', 'D7']},
index=[4, 5, 6, 7])
df3 = pd.DataFrame({'A': ['A8', 'A9', 'A10', 'A11'],
'B': ['B8', 'B9', 'B10', 'B11'],
'C': ['C8', 'C9', 'C10', 'C11'],
'D': ['D8', 'D9', 'D10', 'D11']},
index=[8, 9, 10, 11])
In [17]:
df1
Out[17]:
In [18]:
df2
Out[18]:
In [19]:
df3
Out[19]:
Concatenation
In [20]:
frames = [df1, df2, df3 ]
In [21]:
pd.concat(frames)
#pd.concat(frames, ignore_index=True)
Out[21]:
In [22]:
pd.concat(frames, axis=1)
Out[22]:
In [23]:
df1.append(df2)
Out[23]:
Mergeing
In [77]:
left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})
In [78]:
left
Out[78]:
In [79]:
right
Out[79]:
In [80]:
pd.merge(left, right, on= 'key')
Out[80]:
In [81]:
left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
'key2': ['K0', 'K1', 'K0', 'K1'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
'key2': ['K0', 'K0', 'K0', 'K0'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})
In [82]:
pd.merge(left, right, on=['key1', 'key2'])
Out[82]:
In [83]:
pd.merge(left, right, how='outer', on=['key1', 'key2'])
Out[83]:
In [84]:
pd.merge(left, right, how='left', on=['key1', 'key2'])
Out[84]:
In [85]:
pd.merge(left, right, how='right', on=['key1', 'key2'])
Out[85]:
Joining
In [86]:
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
'B': ['B0', 'B1', 'B2']},
index=['K0', 'K1', 'K2'])
right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
'D': ['D0', 'D2', 'D3']},
index=['K0', 'K2', 'K3'])
In [87]:
left
Out[87]:
In [88]:
right
Out[88]:
In [89]:
left.join(right)
Out[89]: