In [1]:
import pandas as pd
from numpy import random as rnd
import matplotlib.pyplot as plt
from IPython.display import display

In [2]:
# Имена 
names = ['Bob','Jessica','Mary','John','Mel']

rnd.seed(100)
random_names = [
    names[
        rnd.randint(low=0,high=len(names))
    ] for i in range(1000)
]
random_names[:10]


Out[2]:
['Bob', 'Bob', 'John', 'Bob', 'Mary', 'Mel', 'Mary', 'Mary', 'Mary', 'Mary']

In [3]:
# Количество рожденных
rnd.seed(300)
births = [rnd.randint(low=0,high=1000) for i in range(1000)]

births[:10]


Out[3]:
[209, 481, 917, 714, 9, 725, 91, 103, 406, 505]

In [4]:
# Упаковка
BabyDataSet = list(zip(random_names,births))
BabyDataSet[:10]


Out[4]:
[('Bob', 209),
 ('Bob', 481),
 ('John', 917),
 ('Bob', 714),
 ('Mary', 9),
 ('Mel', 725),
 ('Mary', 91),
 ('Mary', 103),
 ('Mary', 406),
 ('Mary', 505)]

In [5]:
# Датафрейм
df = pd.DataFrame(data = BabyDataSet, columns=['Names', 'Births'])
df[:10]


Out[5]:
Names Births
0 Bob 209
1 Bob 481
2 John 917
3 Bob 714
4 Mary 9
5 Mel 725
6 Mary 91
7 Mary 103
8 Mary 406
9 Mary 505

In [6]:
df.head()


Out[6]:
Names Births
0 Bob 209
1 Bob 481
2 John 917
3 Bob 714
4 Mary 9

In [7]:
# уникальные значения в столбце
df['Names'].unique()


Out[7]:
array(['Bob', 'John', 'Mary', 'Mel', 'Jessica'], dtype=object)

In [8]:
# описание столбца
df['Names'].describe()


Out[8]:
count     1000
unique       5
top       John
freq       212
Name: Names, dtype: object

In [9]:
# группировка
name = df.groupby('Names')
df = name.sum()
df


Out[9]:
Births
Names
Bob 94634
Jessica 92289
John 105402
Mary 92007
Mel 106947

In [10]:
Sorted = df.sort_values(['Births'], ascending=False)
Sorted.head(1)


Out[10]:
Births
Names
Mel 106947

In [11]:
df['Births'].plot.bar()
plt.show()

print("The most popular name")
df.sort_values(by='Births', ascending=False)


The most popular name
Out[11]:
Births
Names
Mel 106947
John 105402
Bob 94634
Jessica 92289
Mary 92007

In [ ]: