In [19]:
from scipy import stats

In [4]:
import urllib.request as request
file = request.urlopen('https://raw.githubusercontent.com/lidimayra/basic-stats/master/frequencies/pokemons.txt')
print(file)


<http.client.HTTPResponse object at 0x7fa5734cf9b0>

In [5]:
pokemons = file.read().decode('UTF-8')

In [9]:
file.close()

In [10]:
print(pokemons)


Pidgeotto
Pidgey
Pidgey
Pidgey
Pidgey
Pidgey
Poliwag
Rapidash
Rattata
Rattata
Sandshrew
Sandshrew

In [11]:
pokemons_list = pokemons.split()

In [12]:
pokemons_list[6]


Out[12]:
'Poliwag'

In [13]:
pokemons_list[7]


Out[13]:
'Rapidash'

In [14]:
pokemons_list[8]


Out[14]:
'Rattata'

In [71]:
stats.mode(pokemons_list)[1]


/home/lmayra/.pyenv/versions/3.4.3/lib/python3.4/site-packages/scipy/stats/stats.py:257: RuntimeWarning: The input array could not be properly checked for nan values. nan values will be ignored.
  "values. nan values will be ignored.", RuntimeWarning)
Out[71]:
array([5])

In [72]:
frequencies = stats.itemfreq(pokemons_list)
print(frequencies)


[['Pidgeotto' '1']
 ['Pidgey' '5']
 ['Poliwag' '1']
 ['Rapidash' '1']
 ['Rattata' '2']
 ['Sandshrew' '2']]

In [80]:
type(frequencies)


Out[80]:
numpy.ndarray

In [73]:
xi = frequencies[0:0, 0]
print(xi)


['Pidgeotto' 'Pidgey' 'Poliwag' 'Rapidash' 'Rattata' 'Sandshrew']

In [74]:
fi = frequencies[:, 1]
print(fi)


['1' '5' '1' '1' '2' '2']

In [75]:
fi = fi.astype(int)
print(fi)


[1 5 1 1 2 2]

In [76]:
%matplotlib notebook
import matplotlib.pyplot as plt

x_pos = np.arange(len(xi))
plt.figure(1)
plt.bar(x_pos, fi, align='center')
plt.ylim(0, max(fi) + 0.5)
plt.xticks(np.arange(len(xi)), xi)
plt.show()



In [ ]: