In [1]:
import requests
from bs4 import BeautifulSoup
import re

In [2]:
r = requests.get("http://pythonforengineers.com/reddit-raw-data/")

data = r.text

soup = BeautifulSoup(data)

In [3]:
data_found = None
for s in soup('p'):
    string_found = re.findall("[\w]*\:[\d]+", s.text)
    if string_found:
        data_found = string_found

print data_found


[u'c_language:2975', u'cpp:25270', u'csharp:17401', u'objectivec:4039', u'd_language:1584', u'java:37226', u'smalltalk:797', u'golang:12353', u'scala:7264', u'groovy:1124', u'delphi:592', u'python:88347', u'ruby:26401', u'perl:8951', u'Tcl:519', u'lua:3391', u'php:33953', u'javascript:57747', u'fsharp:1719', u'haskell:18614', u'ocaml:2089', u'lisp:9517', u'scheme:3305', u'erlang:4047', u'matlab:6884', u'brainfuck:117', u'cobol:342', u'fortran:833', u'visualbasic:1822']

In [4]:
data_dict = {}
for data in data_found:
    temp = data.split(":")
    data_dict[temp[0]] = int(temp[1])
    
print data_dict


{u'fsharp': 1719, u'golang': 12353, u'haskell': 18614, u'brainfuck': 117, u'csharp': 17401, u'smalltalk': 797, u'java': 37226, u'scala': 7264, u'delphi': 592, u'perl': 8951, u'lua': 3391, u'matlab': 6884, u'objectivec': 4039, u'scheme': 3305, u'python': 88347, u'javascript': 57747, u'php': 33953, u'ruby': 26401, u'groovy': 1124, u'erlang': 4047, u'visualbasic': 1822, u'lisp': 9517, u'ocaml': 2089, u'd_language': 1584, u'Tcl': 519, u'fortran': 833, u'cpp': 25270, u'cobol': 342, u'c_language': 2975}