In [1]:
import pandas as pd
from numpy import random as rnd
import matplotlib.pyplot as plt
from IPython.display import display
In [2]:
# Имена
names = ['Bob','Jessica','Mary','John','Mel']
rnd.seed(100)
random_names = [
names[
rnd.randint(low=0,high=len(names))
] for i in range(1000)
]
random_names[:10]
Out[2]:
In [3]:
# Количество рожденных
rnd.seed(300)
births = [rnd.randint(low=0,high=1000) for i in range(1000)]
births[:10]
Out[3]:
In [4]:
# Упаковка
BabyDataSet = list(zip(random_names,births))
BabyDataSet[:10]
Out[4]:
In [5]:
# Датафрейм
df = pd.DataFrame(data = BabyDataSet, columns=['Names', 'Births'])
df[:10]
Out[5]:
In [6]:
df.head()
Out[6]:
In [7]:
# уникальные значения в столбце
df['Names'].unique()
Out[7]:
In [8]:
# описание столбца
df['Names'].describe()
Out[8]:
In [9]:
# группировка
name = df.groupby('Names')
df = name.sum()
df
Out[9]:
In [10]:
Sorted = df.sort_values(['Births'], ascending=False)
Sorted.head(1)
Out[10]:
In [11]:
df['Births'].plot.bar()
plt.show()
print("The most popular name")
df.sort_values(by='Births', ascending=False)
Out[11]:
In [ ]: