In [7]:
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

source_path = 'D:\Python_my\Python_Netology_homework\data_names'
source_dir_path = os.path.normpath(os.path.abspath(source_path))

def count_consonant(name):
    count = 0
    for ch in name:
        if ch in 'qwrtypsdfghjklzxcvbnm':
            count += 1
        else:
            pass
    return count

names_dict = {}
names_list = []
for i in range(1900, 2001):
    source_file = os.path.normpath(os.path.join(source_dir_path, 'yob{}.txt'.format(i)))
    year_data = pd.read_csv(source_file, names=['Name', 'Gender', 'Count'])
    year_data = year_data.drop(['Gender'], axis=1)
    year_data['consonants'] = year_data.apply(lambda row: count_consonant(row.Name), axis=1)
    year_data
year_data['consonants'] = year_data.apply(lambda row: count_consonant(row.Name), axis=1)
# year_data = year_data.drop(['Count'], axis=1)

year_data = year_data.groupby('Name').sum().sort_values(by='Count', ascending=False)
year_data


Out[7]:
Count consonants
Name
Jacob 34529 4
Michael 32147 6
Matthew 28616 8
Joshua 27590 4
Emily 25983 6
Christopher 24978 14
Nicholas 24689 8
Andrew 23680 8
Hannah 23103 6
Joseph 22847 6
Daniel 22373 4
Tyler 22185 6
Ryan 20712 4
William 20696 6
Brandon 20377 8
Alexis 20342 6
John 20114 4
Madison 20105 6
Zachary 19886 8
David 19805 4
Anthony 19699 10
Ashley 18078 8
James 18019 4
Jordan 17974 6
Taylor 17930 6
Justin 17826 6
Sarah 17717 4
Alexander 17345 10
Samantha 17286 8
Jonathan 16930 8
... ... ...
Damarko 5 3
Elajah 5 3
Marbin 5 3
Damarlo 5 3
Elahna 5 3
Eldric 5 4
Marasia 5 2
Keishauna 5 3
Keishawna 5 4
Angelice 5 4
Taeyon 5 2
Damarri 5 3
Elania 5 2
Keimon 5 2
Angeldaniel 5 6
Angelann 5 5
Keilin 5 2
Tahjay 5 3
Boubacar 5 3
Keilana 5 2
Bowden 5 3
Redmond 5 4
Keighley 5 4
Rechel 5 3
Rececca 5 3
Tahriq 5 3
Marcea 5 2
Marcedez 5 4
Rebekha 5 3
Shray 5 3

27509 rows × 2 columns


In [8]:
year_data.plot.scatter(x='consonants', y='Count')


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x75f7f06860>

In [ ]: