In [1]:
import pandas as pd  # While Python itself lets you do all the ordinary database things, Pandas lets you do the extraordinary.

In [2]:
pd  # By just typing a package name, you can see whatever the developer wanted you to.


Out[2]:
<module 'pandas' from 'C:\\Users\\mikle\\Anaconda3\\lib\\site-packages\\pandas\\__init__.py'>

In [3]:
pd.DataFrame  # DataFrame's are the most common data type you'll be using from pandas. It's an Excel-like row & column table.


Out[3]:
pandas.core.frame.DataFrame

In [4]:
pd.DataFrame(['foo'])  # You CAN call a pandas DataFrame inputing merely a one-element list.


Out[4]:
0
0 foo

In [5]:
pd.DataFrame([('foo')])  # If it's going to support more table-like list-of-lists, it had better support nesting.


Out[5]:
0
0 foo

In [6]:
pd.DataFrame([('a', 'b', 'c'), ('d', 'e', 'f'), ('g', 'h', 'i')])  #  Okay, so let's feed it a hand-coded list of tuples.


Out[6]:
0 1 2
0 a b c
1 d e f
2 g h i

In [11]:
alist = list(range(1, 21))
list_of_tuples = [tuple(alist) for aline in alist]  # Create list_of_tuples from last lesson

In [12]:
pd.DataFrame(list_of_tuples)  # Now let's feed it the list_of_tuples we have waiting.


Out[12]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20

In [13]:
df = pd.DataFrame(list_of_tuples)  # You can 0utput the data to an immediately Excel-readable CSV at any time.
df.to_csv('saved-data.csv')

In [14]:
df.to_csv('saved-data-noindex.csv', index=False)  # You can leave out the automatic pandas-supplied built-in index column.

In [15]:
df.describe()  # Because the data is now in Pandas, you can go straignt for some interesting statistics on the table.


Out[15]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
count 20.0 20.0 20.0 20.0 20.0 20.0 20.0 20.0 20.0 20.0 20.0 20.0 20.0 20.0 20.0 20.0 20.0 20.0 20.0 20.0
mean 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0
std 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
min 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0
25% 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0
50% 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0
75% 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0
max 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0 11.0 12.0 13.0 14.0 15.0 16.0 17.0 18.0 19.0 20.0

In [17]:
reversed_list = list(reversed(alist))  # We're going to want to take control of our column names. Here's a start.
xlabels = ['x%s' % x for x in alist]
xlabels


Out[17]:
['x1',
 'x2',
 'x3',
 'x4',
 'x5',
 'x6',
 'x7',
 'x8',
 'x9',
 'x10',
 'x11',
 'x12',
 'x13',
 'x14',
 'x15',
 'x16',
 'x17',
 'x18',
 'x19',
 'x20']

In [18]:
pd.DataFrame([alist for x in alist], columns=xlabels, index=reversed_list)  # With 20-item list, we can set our column names.


Out[18]:
x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 x11 x12 x13 x14 x15 x16 x17 x18 x19 x20
20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20

In [19]:
ord('A')  # But shouldn't we use Excel-like A1 column notation? Isn't there an ordinal function? Yes!


Out[19]:
65

In [20]:
alphabet_ordinals = list(range(ord('A'), ord('Z') + 1))  # With an ordinal function, you can always figure out A to Z.
for a_number in alphabet_ordinals:
    print("%s: %s" % (a_number, chr(a_number)))


65: A
66: B
67: C
68: D
69: E
70: F
71: G
72: H
73: I
74: J
75: K
76: L
77: M
78: N
79: O
80: P
81: Q
82: R
83: S
84: T
85: U
86: V
87: W
88: X
89: Y
90: Z

In [21]:
# However, this is where we just rely on Google and StackOverflow when we Google "Python A1 notation". Here's a function.

def a1_notation(n):
    string = ""
    while n > 0:
        n, remainder = divmod(n - 1, 26)
        string = chr(65 + remainder) + string
    return string

print(a1_notation(1))
print(a1_notation(26))
print(a1_notation(27))
print(a1_notation(702))


A
Z
AA
ZZ

In [22]:
A1_list = [a1_notation(x) for x in alist]  # We use our A1 noation function to create a list of the column labels we need.
A1_list


Out[22]:
['A',
 'B',
 'C',
 'D',
 'E',
 'F',
 'G',
 'H',
 'I',
 'J',
 'K',
 'L',
 'M',
 'N',
 'O',
 'P',
 'Q',
 'R',
 'S',
 'T']

In [23]:
pd.DataFrame([alist for x in alist], columns=A1_list)  # Now we can have a DataGrid that is always addressible like Excel.


Out[23]:
A B C D E F G H I J K L M N O P Q R S T
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20

In [24]:
alist = list(range(1, 31))
A1_list = [a1_notation(x) for x in alist]
df = pd.DataFrame([alist for aline in alist], columns=A1_list)  # Just to make sure, let's go past Z. Good!
df


Out[24]:
A B C D E F G H I J ... U V W X Y Z AA AB AC AD
0 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
1 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
2 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
3 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
4 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
5 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
6 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
7 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
8 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
9 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
10 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
11 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
12 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
13 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
14 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
15 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
16 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
17 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
18 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
19 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
20 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
21 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
22 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
23 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
24 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
25 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
26 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
27 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
28 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30
29 1 2 3 4 5 6 7 8 9 10 ... 21 22 23 24 25 26 27 28 29 30

30 rows × 30 columns


In [25]:
pd.set_option('display.max_columns', 500)  # We can change the Pandas display options to show all of our columns.
df


Out[25]:
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
21 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
23 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
24 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
25 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
26 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
27 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
28 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30