In [ ]:
import os
import pandas as pd
source_path = 'D:\Python_my\Python_Netology_homework\data_names'
source_dir_path = os.path.normpath(os.path.abspath(source_path))
In [1]:
source_path = 'D:\Python_my\Python_Netology_homework\data_names'
def count_top3(arg_list):
global source_path
source_dir_path = os.path.normpath(os.path.abspath(source_path))
columns = ['Name', 'Gender', 'Count']
names = []
for year in arg_list:
source_file = os.path.normpath(os.path.join(source_dir_path, 'yob' + str(year) + '.txt'))
names.append(pd.read_csv(source_file, names=columns))
names_all = pd.concat(names, names=['Year', 'Gender'])
return names_all.groupby('Name').sum().sort_values(by='Count', ascending=False).head(3)
count_top3([1900, 1950, 2000])
In [2]:
import os
In [3]:
source_path = 'D:\Python_my\Python_Netology_homework\data_names'
def count_top3(arg_list):
global source_path
source_dir_path = os.path.normpath(os.path.abspath(source_path))
columns = ['Name', 'Gender', 'Count']
names = []
for year in arg_list:
source_file = os.path.normpath(os.path.join(source_dir_path, 'yob' + str(year) + '.txt'))
names.append(pd.read_csv(source_file, names=columns))
names_all = pd.concat(names, names=['Year', 'Gender'])
return names_all.groupby('Name').sum().sort_values(by='Count', ascending=False).head(3)
In [4]:
count_top3([1900, 1950, 2000])
In [5]:
import pandas as pd
In [6]:
count_top3([1900, 1950, 2000])
Out[6]:
In [7]:
print(count_top3([1880]))
In [8]:
def count_dynamics(arg_list):
global source_path
source_dir_path = os.path.normpath(os.path.abspath(source_path))
columns = ['Name', 'Gender', 'Count']
result_per_gender = {'F': [], 'M': []}
for year in arg_list:
source_file = os.path.normpath(os.path.join(source_dir_path, 'yob' + str(year) + '.txt'))
gend = pd.read_csv(source_file, names=columns).groupby('Gender').sum()
# print(gend)
result = gend.query('Gender == "M"')
result_per_gender['M'].append(result['Count'][0])
result = gend.query('Gender == "F"')
result_per_gender['F'].append(result['Count'][0])
return result_per_gender
In [9]:
count_dynamics([1900, 1950, 2000])
Out[9]:
In [ ]: