In [ ]:
import os
import pandas as pd
source_path = 'D:\Python_my\Python_Netology_homework\data_names'
source_dir_path = os.path.normpath(os.path.abspath(source_path))

In [1]:
source_path = 'D:\Python_my\Python_Netology_homework\data_names'
def count_top3(arg_list):
    global source_path
    source_dir_path = os.path.normpath(os.path.abspath(source_path))
    columns = ['Name', 'Gender', 'Count']
    names = []
    for year in arg_list:
        source_file = os.path.normpath(os.path.join(source_dir_path, 'yob' + str(year) + '.txt'))
        names.append(pd.read_csv(source_file, names=columns))
    names_all = pd.concat(names, names=['Year', 'Gender'])

    return names_all.groupby('Name').sum().sort_values(by='Count', ascending=False).head(3)
count_top3([1900, 1950, 2000])


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-1-b9285970f73c> in <module>()
     11 
     12     return names_all.groupby('Name').sum().sort_values(by='Count', ascending=False).head(3)
---> 13 count_top3([1900, 1950, 2000])

<ipython-input-1-b9285970f73c> in count_top3(arg_list)
      2 def count_top3(arg_list):
      3     global source_path
----> 4     source_dir_path = os.path.normpath(os.path.abspath(source_path))
      5     columns = ['Name', 'Gender', 'Count']
      6     names = []

NameError: name 'os' is not defined

In [2]:
import os

In [3]:
source_path = 'D:\Python_my\Python_Netology_homework\data_names'
def count_top3(arg_list):
    global source_path
    source_dir_path = os.path.normpath(os.path.abspath(source_path))
    columns = ['Name', 'Gender', 'Count']
    names = []
    for year in arg_list:
        source_file = os.path.normpath(os.path.join(source_dir_path, 'yob' + str(year) + '.txt'))
        names.append(pd.read_csv(source_file, names=columns))
    names_all = pd.concat(names, names=['Year', 'Gender'])

    return names_all.groupby('Name').sum().sort_values(by='Count', ascending=False).head(3)

In [4]:
count_top3([1900, 1950, 2000])


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-4-ee24af684670> in <module>()
----> 1 count_top3([1900, 1950, 2000])

<ipython-input-3-be02f585c779> in count_top3(arg_list)
      7     for year in arg_list:
      8         source_file = os.path.normpath(os.path.join(source_dir_path, 'yob' + str(year) + '.txt'))
----> 9         names.append(pd.read_csv(source_file, names=columns))
     10     names_all = pd.concat(names, names=['Year', 'Gender'])
     11 

NameError: name 'pd' is not defined

In [5]:
import pandas as pd

In [6]:
count_top3([1900, 1950, 2000])


Out[6]:
Count
Name
James 111795
John 109595
Robert 101381

In [7]:
print(count_top3([1880]))


         Count
Name          
John      9701
William   9562
Mary      7092

In [8]:
def count_dynamics(arg_list):
    global source_path
    source_dir_path = os.path.normpath(os.path.abspath(source_path))
    columns = ['Name', 'Gender', 'Count']
    result_per_gender = {'F': [], 'M': []}
    for year in arg_list:
        source_file = os.path.normpath(os.path.join(source_dir_path, 'yob' + str(year) + '.txt'))
        gend = pd.read_csv(source_file, names=columns).groupby('Gender').sum()
        # print(gend)
        result = gend.query('Gender == "M"')
        result_per_gender['M'].append(result['Count'][0])
        result = gend.query('Gender == "F"')
        result_per_gender['F'].append(result['Count'][0])
    return result_per_gender

In [9]:
count_dynamics([1900, 1950, 2000])


Out[9]:
{'F': [299810, 1713259, 1814922], 'M': [150486, 1790871, 1962744]}

In [ ]: