In [10]:
import pandas as pd
PATH = '/Users/martysyuk/Documents/Python 3 Coding/Repositorys/PY-3-Learning/homeworks/names/'
YEARS = ['1900', '1950', '2000']
MERGE_ON = ['Name', 'Gender']
COLUMNS = ['Name', 'Gender', 'Count']
data = open_csv_files_by_year(YEARS)
In [2]:
def open_csv_files_by_year(*args):
_data = dict()
for year in args[0]:
try:
_file_name = PATH + 'yob' + str(year) + '.txt'
_data.update({year: pd.read_csv(_file_name, names=COLUMNS)})
except FileNotFoundError:
pass
return _data
In [3]:
def count_top3(_data_):
names = pd.concat(_data_)
df = names.groupby(MERGE_ON, as_index=False).sum().sort_values(by='Count', ascending=False).head(3)
return list(df.Name)
In [5]:
def count_dynamics(_data_):
_dyn_data_ = dict()
male_by_years = list()
female_by_years = list()
for year in _data_:
male_by_years.append(year[year.Gender == 'M'].Count.sum())
female_by_years.append(year[year.Gender == 'F'].Count.sum())
_dyn_data_.update({'M': male_by_years, 'F': female_by_years})
return _dyn_data_
In [8]:
count_top3(data)
Out[8]:
In [9]:
count_dynamics(data)
Out[9]: