In [7]:
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
source_path = 'D:\Python_my\Python_Netology_homework\data_names'
source_dir_path = os.path.normpath(os.path.abspath(source_path))
def count_consonant(name):
count = 0
for ch in name:
if ch in 'qwrtypsdfghjklzxcvbnm':
count += 1
else:
pass
return count
names_dict = {}
names_list = []
for i in range(1900, 2001):
source_file = os.path.normpath(os.path.join(source_dir_path, 'yob{}.txt'.format(i)))
year_data = pd.read_csv(source_file, names=['Name', 'Gender', 'Count'])
year_data = year_data.drop(['Gender'], axis=1)
year_data['consonants'] = year_data.apply(lambda row: count_consonant(row.Name), axis=1)
year_data
year_data['consonants'] = year_data.apply(lambda row: count_consonant(row.Name), axis=1)
# year_data = year_data.drop(['Count'], axis=1)
year_data = year_data.groupby('Name').sum().sort_values(by='Count', ascending=False)
year_data
Out[7]:
In [8]:
year_data.plot.scatter(x='consonants', y='Count')
Out[8]:
In [ ]: