In [17]:
%matplotlib notebook
"""
This Python scripts retrieves the number of occurrances of each character in a file containing
10,000 of the most used passwords.
"""

import re
import matplotlib.pyplot as plt

def read_file():
    """
    Open the file containing the passwords
    """

    password_list = open('10k_passwords.txt', 'r')

    return password_list

PWD_LIST = read_file()

LETTERS = set()
i = 0

# Loop through the passwords and remove invalid characters
while i < 10000:
    PASSWORD = PWD_LIST.readline()
    for letter in PASSWORD:
        if re.match(r"\w", letter) and letter != "_":
            LETTERS.add(letter)
    i += 1

# Re-read the password file for new comparision
PASSWORDS = read_file().read()

LETTER_COUNT = dict()

for letter in LETTERS:
    LETTER_COUNT[letter] = PASSWORDS.count(letter)
    
plt.bar(range(len(LETTER_COUNT)), LETTER_COUNT.values(), align="center")
plt.xticks(range(len(LETTER_COUNT)), list(LETTER_COUNT.keys()))


Out[17]:
([<matplotlib.axis.XTick at 0x7fcd5157a5c0>,
  <matplotlib.axis.XTick at 0x7fcd51521a58>,
  <matplotlib.axis.XTick at 0x7fcd5129b518>,
  <matplotlib.axis.XTick at 0x7fcd511c5ef0>,
  <matplotlib.axis.XTick at 0x7fcd511cc940>,
  <matplotlib.axis.XTick at 0x7fcd511ce390>,
  <matplotlib.axis.XTick at 0x7fcd511ceda0>,
  <matplotlib.axis.XTick at 0x7fcd511d27f0>,
  <matplotlib.axis.XTick at 0x7fcd511d5240>,
  <matplotlib.axis.XTick at 0x7fcd511d5c50>,
  <matplotlib.axis.XTick at 0x7fcd511da6a0>,
  <matplotlib.axis.XTick at 0x7fcd511df0f0>,
  <matplotlib.axis.XTick at 0x7fcd511dfb00>,
  <matplotlib.axis.XTick at 0x7fcd511e1550>,
  <matplotlib.axis.XTick at 0x7fcd511e1f60>,
  <matplotlib.axis.XTick at 0x7fcd511e59b0>,
  <matplotlib.axis.XTick at 0x7fcd511e9400>,
  <matplotlib.axis.XTick at 0x7fcd511e9e10>,
  <matplotlib.axis.XTick at 0x7fcd511ee860>,
  <matplotlib.axis.XTick at 0x7fcd511f22b0>,
  <matplotlib.axis.XTick at 0x7fcd511f2cc0>,
  <matplotlib.axis.XTick at 0x7fcd511f6710>,
  <matplotlib.axis.XTick at 0x7fcd511fa160>,
  <matplotlib.axis.XTick at 0x7fcd511fab70>,
  <matplotlib.axis.XTick at 0x7fcd5117d5c0>,
  <matplotlib.axis.XTick at 0x7fcd5117dfd0>,
  <matplotlib.axis.XTick at 0x7fcd51182a20>,
  <matplotlib.axis.XTick at 0x7fcd51186470>,
  <matplotlib.axis.XTick at 0x7fcd51186e80>,
  <matplotlib.axis.XTick at 0x7fcd5118a8d0>,
  <matplotlib.axis.XTick at 0x7fcd5118e320>,
  <matplotlib.axis.XTick at 0x7fcd5118ed30>,
  <matplotlib.axis.XTick at 0x7fcd51191780>,
  <matplotlib.axis.XTick at 0x7fcd511961d0>,
  <matplotlib.axis.XTick at 0x7fcd51196be0>,
  <matplotlib.axis.XTick at 0x7fcd5119a630>],
 <a list of 36 Text xticklabel objects>)