In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

source_path = 'D:\Python_my\Python_Netology_homework\data_names'
source_dir_path = os.path.normpath(os.path.abspath(source_path))


def download_year_data(year):
    y = year
    source_file = os.path.normpath(os.path.join(source_dir_path, 'yob{}.txt'.format(year)))
    year_data = pd.read_csv(source_file, names=['Name', 'Gender', 'Count'])
    # year_data['Year'] = year_data.apply(lambda x: int(year), axis=1)
    year_data = year_data.drop(['Gender'], axis=1)
    # print(year_data.query('Name == "Ruth" | Name == "Robert"').groupby('Name').sum())
    return year_data.query('Name == ["Ruth", "Robert"]').groupby('Name').sum()

names = []
names_dict = {}
ruth_n_robert_all_time = {}
for i in range(1900, 2001):
    names_dict[i] = download_year_data(i)

ruth_n_robert_all_time = pd.concat(names_dict, names=['Year'])
# print(ruth_n_robert_all_time)
print()
# print(ruth_n_robert_all_time.unstack('Name'))




In [2]:
ruth_n_robert_dynamics = ruth_n_robert_all_time.unstack('Name')
ruth_n_robert_dynamics


Out[2]:
Count
Name Robert Ruth
Year
1900 3845 4781
1901 2559 3989
1902 3201 4401
1903 3057 4531
1904 3427 4915
1905 3431 5088
1906 3651 5159
1907 4042 5588
1908 4245 6200
1909 4588 6526
1910 5638 7247
1911 6565 8032
1912 12901 11323
1913 15755 12641
1914 21268 15885
1915 28854 21926
1916 31862 23244
1917 35388 23617
1918 40798 25582
1919 41211 24628
1920 48886 26155
1921 53024 25840
1922 54033 23702
1923 56368 23699
1924 61100 23671
1925 61244 22326
1926 61497 20293
1927 62079 19504
1928 61126 17930
1929 60238 16084
... ... ...
1971 50955 1640
1972 43321 1383
1973 38957 1288
1974 37282 1305
1975 35580 1235
1976 34046 1209
1977 34460 1197
1978 33646 1219
1979 34275 1209
1980 34530 1262
1981 34624 1272
1982 34649 1193
1983 33000 1135
1984 31996 1174
1985 32325 1125
1986 31580 1096
1987 30447 1040
1988 30492 1012
1989 30190 997
1990 28976 913
1991 26905 990
1992 24147 853
1993 22514 862
1994 20606 879
1995 18624 856
1996 17709 801
1997 16339 856
1998 15552 850
1999 14563 867
2000 13758 902

101 rows × 2 columns


In [5]:
ruth_n_robert_dynamics.plot(title='Ruth vs Robert', grid=True)


Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0xed24bfc50>

In [6]:
ruth_n_robert_dynamics.plot.bar()


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0xed0133e48>

In [5]:
import matplotlib.pyplot as plt

In [6]:
ruth_n_robert_dynamics.plot(title='Ruth vs Robert', grid=True)


Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0xe9efc0390>

In [4]:
%matplotlib inline

In [8]:
ruth_n_robert_dynamics.plot(title='Ruth vs Robert', grid=True)


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0xe9f06a0f0>

In [9]:
ruth_n_robert_dynamics.plot.bar()


Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0xe9ed749b0>

In [ ]: