In [12]:
import os
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

source_path = 'D:\Python_my\Python_Netology_homework\data_names'
source_dir_path = os.path.normpath(os.path.abspath(source_path))
source_file = os.path.normpath(os.path.join(source_dir_path, 'yob{}.txt'.format(1950)))
year_data = pd.read_csv(source_file, names=['Name', 'Gender', 'Count'])
year_data = year_data.drop(['Gender'], axis=1)
top_10 = year_data[(year_data.Name.str.startswith('R'))].head(10)
top_10


Out[12]:
Name Count
35 Rebecca 9426
48 Ruth 7128
55 Rita 6425
68 Rose 5373
95 Roberta 3632
100 Rosemary 3457
124 Rhonda 2567
129 Ruby 2417
142 Regina 2166
148 Robin 1992

In [2]:
top_10 = year_data[(year_data.Count > 3000) & (year_data.Name.str.startswith('R'))].head(10)
top_10


Out[2]:
Name Count
35 Rebecca 9426
48 Ruth 7128
55 Rita 6425
68 Rose 5373
95 Roberta 3632
100 Rosemary 3457
6114 Robert 83575
6119 Richard 51014
6124 Ronald 29917
6139 Roger 12551

In [3]:
top_10 = year_data[(year_data.Count > 3000) & (year_data.Name.str.startswith('R'))].sort_values('Count', ascending=False).head(10)
top_10


Out[3]:
Name Count
6114 Robert 83575
6119 Richard 51014
6124 Ronald 29917
6139 Roger 12551
6143 Raymond 11180
35 Rebecca 9426
48 Ruth 7128
6163 Roy 6459
55 Rita 6425
6164 Ralph 6406

In [13]:
r_name = year_data[(year_data.Name.str.startswith('R'))]
top_10 = r_name.groupby('Name').sum().sort_values('Count', ascending=False).head(10)
top_10


Out[13]:
Count
Name
Robert 83778
Richard 51111
Ronald 29985
Roger 12578
Raymond 11218
Rebecca 9443
Ruth 7138
Roy 6491
Rita 6439
Ralph 6421

In [14]:
top_10.plot.pie(y='Count')


Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0xb7d2fc9d68>

In [6]:
import matplotlib.pyplot as plt

In [8]:
top_10.plot.pie(y='Count')


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0xb7d2df84a8>

In [10]:
%matplotlib inline

In [11]:
top_10.plot.pie(y='Count')


Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0xb7d2ed82e8>

In [ ]: