In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

source_path = 'D:\Python_my\Python_Netology_homework\data_names'
source_dir_path = os.path.normpath(os.path.abspath(source_path))


def download_year_data(year):
    y = year
    source_file = os.path.normpath(os.path.join(source_dir_path, 'yob{}.txt'.format(year)))
    year_data = pd.read_csv(source_file, names=['Name', 'Gender', 'Count'])
    # year_data['Year'] = year_data.apply(lambda x: int(year), axis=1)
    year_data = year_data.drop(['Gender'], axis=1)
    # print(year_data.query('Name == "Ruth" | Name == "Robert"').groupby('Name').sum())
    return year_data.query('Name == ["Ruth", "Robert"]').groupby('Name').sum()

names = []
names_dict = {}
ruth_n_robert_all_time = {}
for i in range(1900, 2001, 5):
    names_dict[i] = download_year_data(i)

ruth_n_robert_all_time = pd.concat(names_dict, names=['Year'])

In [3]:
ruth_n_robert_dynamics = ruth_n_robert_all_time.unstack('Name')
ruth_n_robert_dynamics


Out[3]:
Count
Name Robert Ruth
Year
1900 3845 4781
1905 3431 5088
1910 5638 7247
1915 28854 21926
1920 48886 26155
1925 61244 22326
1930 62576 15010
1935 56750 10041
1940 61401 8922
1945 70130 8062
1950 83778 7138
1955 83918 6202
1960 72649 4960
1965 63392 3242
1970 57533 1829
1975 35580 1235
1980 34530 1262
1985 32325 1125
1990 28976 913
1995 18624 856
2000 13758 902

In [4]:
ruth_n_robert_dynamics.plot(title='Ruth vs Robert', grid=True)


Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x18b932a668>

In [5]:
ruth_n_robert_dynamics.plot.bar()


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x18b9f198d0>

In [6]:
ruth_n_robert_dynamics.plot(title='Ruth vs Robert', grid=True)


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x18b9145320>

In [7]:
ruth_n_robert_dynamics.plot.bar()


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x18b9cf50f0>

In [ ]: