notebook.community

Edit and run



In [ ]:



In [ ]:



In [2]:

    
# this is a BW Mod  
# 
from matplotlib import pyplot as plt

##########################
#                        #
# FINDING KEY CONNECTORS #
#                        #
##########################

users = [
    { "id": 0, "name": "Hero" },
    { "id": 1, "name": "Dunn" },
    { "id": 2, "name": "Sue" },
    { "id": 3, "name": "Chi" },
    { "id": 4, "name": "Thor" },
    { "id": 5, "name": "Clive" },
    { "id": 6, "name": "Hicks" },
    { "id": 7, "name": "Devin" },
    { "id": 8, "name": "Kate" },
    { "id": 9, "name": "Klein" },
    { "id": 10, "name": "Jen" }
]

friendships = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4),
               (4, 5), (5, 6), (5, 7), (6, 8), (7, 8), (8, 9)]



# first give each user an empty list
# it is a empty list, not an empty string, so we have a list of dictionaries, of which dict entry fiends is another list.
for user in users:
    user["friends"] = []
 
import copy
 
#print "users[0] : ", users[0]    
#users[0]["friends"].append(users[1])
#t={}
#print "t = ", t
#t=copy.deepcopy(users[0])
#del t["friends"]
#print "t = ", t
#users[0]["friends"].append(t)
#print users[0]["friends"]
#
#t=copy.deepcopy(users[2])
#del t["friends"]
#users[0]["friends"].append(t)    
#print users[0]["friends"]
#print "\n"
#print "users[0] : ", users[0]    
#print users[0]


# and then populate the lists with friendships
for i, j in friendships:
#    # this works because users[i] is the user whose id is i
    print("i=",i," j=",j )
# 2015/12/07 - replaced deepcopy by copy in the two t= assignments below (BW)    
    t=copy.copy(users[j])
    del t["friends"]
    users[i]["friends"].append(t)
#    
    t=copy.copy(users[i])
    del t["friends"]
    users[j]["friends"].append(t)
    
    print "users[", i, "] : ", users[i]  
#    users[i]["friends"].append(users[j]) # add i as a friend of j
#    users[j]["friends"].append(users[i]) # add j as a friend of i







# and then populate the lists with friendships
#for i, j in friendships:
#    # this works because users[i] is the user whose id is i
#    print("i=",i," j=",j )
#    users[i]["friends"].append(users[j]) # add i as a friend of j
#    users[j]["friends"].append(users[i]) # add j as a friend of i
    
#print(" bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce ")
#print('users[0])["id"]          =>' , str(users[0]["id"])) 
#print('users[0])["name"]        =>' , str(users[0]["name"]))
#print('users[0])["friends"][1]  =>' , str(users[0]["friends"][1]) )
#print('users[0])["friends"]     =>' , users[0]["friends"]     ) 
#print(" bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce ")









    



('i=', 0, ' j=', 1)
users[ 0 ] :  {'friends': [{'id': 1, 'name': 'Dunn'}], 'id': 0, 'name': 'Hero'}
('i=', 0, ' j=', 2)
users[ 0 ] :  {'friends': [{'id': 1, 'name': 'Dunn'}, {'id': 2, 'name': 'Sue'}], 'id': 0, 'name': 'Hero'}
('i=', 1, ' j=', 2)
users[ 1 ] :  {'friends': [{'id': 0, 'name': 'Hero'}, {'id': 2, 'name': 'Sue'}], 'id': 1, 'name': 'Dunn'}
('i=', 1, ' j=', 3)
users[ 1 ] :  {'friends': [{'id': 0, 'name': 'Hero'}, {'id': 2, 'name': 'Sue'}, {'id': 3, 'name': 'Chi'}], 'id': 1, 'name': 'Dunn'}
('i=', 2, ' j=', 3)
users[ 2 ] :  {'friends': [{'id': 0, 'name': 'Hero'}, {'id': 1, 'name': 'Dunn'}, {'id': 3, 'name': 'Chi'}], 'id': 2, 'name': 'Sue'}
('i=', 3, ' j=', 4)
users[ 3 ] :  {'friends': [{'id': 1, 'name': 'Dunn'}, {'id': 2, 'name': 'Sue'}, {'id': 4, 'name': 'Thor'}], 'id': 3, 'name': 'Chi'}
('i=', 4, ' j=', 5)
users[ 4 ] :  {'friends': [{'id': 3, 'name': 'Chi'}, {'id': 5, 'name': 'Clive'}], 'id': 4, 'name': 'Thor'}
('i=', 5, ' j=', 6)
users[ 5 ] :  {'friends': [{'id': 4, 'name': 'Thor'}, {'id': 6, 'name': 'Hicks'}], 'id': 5, 'name': 'Clive'}
('i=', 5, ' j=', 7)
users[ 5 ] :  {'friends': [{'id': 4, 'name': 'Thor'}, {'id': 6, 'name': 'Hicks'}, {'id': 7, 'name': 'Devin'}], 'id': 5, 'name': 'Clive'}
('i=', 6, ' j=', 8)
users[ 6 ] :  {'friends': [{'id': 5, 'name': 'Clive'}, {'id': 8, 'name': 'Kate'}], 'id': 6, 'name': 'Hicks'}
('i=', 7, ' j=', 8)
users[ 7 ] :  {'friends': [{'id': 5, 'name': 'Clive'}, {'id': 8, 'name': 'Kate'}], 'id': 7, 'name': 'Devin'}
('i=', 8, ' j=', 9)
users[ 8 ] :  {'friends': [{'id': 6, 'name': 'Hicks'}, {'id': 7, 'name': 'Devin'}, {'id': 9, 'name': 'Klein'}], 'id': 8, 'name': 'Kate'}



In [ ]:

    
#aList = [123, 'xyz', 'zara', 'abc'];
#bList=["wombat"]
#print "UN Updated List : ", aList
#print "UN Updated List, second entry  : ", aList[3]


#users[0]["friends"].append(aList)

#print(' UN Updated users[0]["friends"] list : ', str(users[0]["friends"]) )

#users[0]["friends"].append(bList)

#aList.append( 2009 );
#print "Updated List : ", aList

#users[0]["friends"].append(aList)
#print(' Updated users[0]["friends"] list : ', str(users[0]["friends"]) ) 

#cList=users[0]["friends"]
#print "clist --> ", cList

#aList.append("toe rag")
#bList.append("toe rag")
#print(' Updated users[0]["friends"] list : ', str(users[0]["friends"]) ) 
#print "clist --> ", cList



In [ ]:



In [3]:

    
# at this stage in the book we haven't actually installed matplotlib,
# comment this out if you need to
from matplotlib import pyplot as plt

##########################
#                        #
# FINDING KEY CONNECTORS #
#                        #
##########################

users = [
    { "id": 0, "name": "Hero" },
    { "id": 1, "name": "Dunn" },
    { "id": 2, "name": "Sue" },
    { "id": 3, "name": "Chi" },
    { "id": 4, "name": "Thor" },
    { "id": 5, "name": "Clive" },
    { "id": 6, "name": "Hicks" },
    { "id": 7, "name": "Devin" },
    { "id": 8, "name": "Kate" },
    { "id": 9, "name": "Klein" },
    { "id": 10, "name": "Jen" }
]

friendships = [(0, 1), (0, 2), (1, 2), (1, 3), (2, 3), (3, 4),
               (4, 5), (5, 6), (5, 7), (6, 8), (7, 8), (8, 9)]



# first give each user an empty list
# it is a empty list, not an empty string, so we have a list of dictionaries, of which dict entry fiends is another list.
for user in users:
    user["friends"] = []

    

 
    


# and then populate the lists with friendships
for i, j in friendships:
    # this works because users[i] is the user whose id is i
    print("i=",i," j=",j )
    users[i]["friends"].append(users[j]) # add i as a friend of j
    users[j]["friends"].append(users[i]) # add j as a friend of i
    
print(" bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce ")
print('users[0])["id"]          =>' , str(users[0]["id"])) 
print('users[0])["name"]        =>' , str(users[0]["name"]))
#print('users[0])["friends"][1]  =>' , str(users[0]["friends"][1]) )
print('users[0])["friends"]     =>' , str(users[0]["friends"] )    ) 
print(" bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce ")
    
    

def number_of_friends(user):
    """how many friends does _user_ have?"""
    return len(user["friends"]) # length of friend_ids list

total_connections = sum(number_of_friends(user)
                        for user in users) # 24

num_users = len(users)
avg_connections = total_connections / num_users # 2.4

################################
#                              #
# DATA SCIENTISTS YOU MAY KNOW #
#                              #
################################

def friends_of_friend_ids_bad(user):
    # "foaf" is short for "friend of a friend"
    return [foaf["id"]
            for friend in user["friends"] # for each of user's friends
            for foaf in friend["friends"]] # get each of _their_ friends

from collections import Counter # not loaded by default

def not_the_same(user, other_user):
    """two users are not the same if they have different ids"""
    return user["id"] != other_user["id"]

def not_friends(user, other_user):
    """other_user is not a friend if he's not in user["friends"];
    that is, if he's not_the_same as all the people in user["friends"]"""
    return all(not_the_same(friend, other_user)
               for friend in user["friends"])

def friends_of_friend_ids(user):
    return Counter(foaf["id"]
                   for friend in user["friends"]  # for each of my friends
                   for foaf in friend["friends"]  # count *their* friends
                   if not_the_same(user, foaf)    # who aren't me
                   and not_friends(user, foaf))   # and aren't my friends

print(friends_of_friend_ids(users[3])) # Counter({0: 2, 5: 1})

interests = [
    (0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"),
    (0, "Spark"), (0, "Storm"), (0, "Cassandra"),
    (1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"),
    (1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"),
    (2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"),
    (3, "statistics"), (3, "regression"), (3, "probability"),
    (4, "machine learning"), (4, "regression"), (4, "decision trees"),
    (4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"),
    (5, "Haskell"), (5, "programming languages"), (6, "statistics"),
    (6, "probability"), (6, "mathematics"), (6, "theory"),
    (7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"),
    (7, "neural networks"), (8, "neural networks"), (8, "deep learning"),
    (8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"),
    (9, "Java"), (9, "MapReduce"), (9, "Big Data")
]

def data_scientists_who_like(target_interest):
    return [user_id
            for user_id, user_interest in interests
            if user_interest == target_interest]

from collections import defaultdict

# keys are interests, values are lists of user_ids with that interest
user_ids_by_interest = defaultdict(list)

for user_id, interest in interests:
    user_ids_by_interest[interest].append(user_id)

# keys are user_ids, values are lists of interests for that user_id
interests_by_user_id = defaultdict(list)

for user_id, interest in interests:
    interests_by_user_id[user_id].append(interest)

def most_common_interests_with(user_id):
    return Counter(interested_user_id
        for interest in interests_by_user["user_id"]
        for interested_user_id in users_by_interest[interest]
        if interested_user_id != user_id)

###########################
#                         #
# SALARIES AND EXPERIENCE #
#                         #
###########################

salaries_and_tenures = [(83000, 8.7), (88000, 8.1),
                        (48000, 0.7), (76000, 6),
                        (69000, 6.5), (76000, 7.5),
                        (60000, 2.5), (83000, 10),
                        (48000, 1.9), (63000, 4.2)]

def make_chart_salaries_by_tenure():
    tenures = [tenure for salary, tenure in salaries_and_tenures]
    salaries = [salary for salary, tenure in salaries_and_tenures]
    plt.scatter(tenures, salaries)
    plt.xlabel("Years Experience")
    plt.ylabel("Salary")
    plt.show()

# keys are years
# values are the salaries for each tenure
salary_by_tenure = defaultdict(list)

for salary, tenure in salaries_and_tenures:
    salary_by_tenure[tenure].append(salary)

average_salary_by_tenure = {
    tenure : sum(salaries) / len(salaries)
    for tenure, salaries in salary_by_tenure.items()
}

def tenure_bucket(tenure):
    if tenure < 2: return "less than two"
    elif tenure < 5: return "between two and five"
    else: return "more than five"

salary_by_tenure_bucket = defaultdict(list)

for salary, tenure in salaries_and_tenures:
    bucket = tenure_bucket(tenure)
    salary_by_tenure_bucket[bucket].append(salary)

average_salary_by_bucket = {
  tenure_bucket : sum(salaries) / len(salaries)
  for tenure_bucket, salaries in salary_by_tenure_bucket.items()
}


#################
#               #
# PAID_ACCOUNTS #
#               #
#################

def predict_paid_or_unpaid(years_experience):
  if years_experience < 3.0: return "paid"
  elif years_experience < 8.5: return "unpaid"
  else: return "paid"

######################
#                    #
# TOPICS OF INTEREST #
#                    #
######################

words_and_counts = Counter(word
                           for user, interest in interests
                           for word in interest.lower().split())


if __name__ == "__main__":

    print()
    print("######################")
    print("#")
    print("# FINDING KEY CONNECTORS")
    print("#")
    print("######################")
    print()


    print("total connections", total_connections)
    print("number of users", num_users)
    print("average connections", total_connections / num_users)
    print()

    # create a list (user_id, number_of_friends)
    num_friends_by_id = [(user["id"], number_of_friends(user))
                         for user in users]

    print("users sorted by number of friends:")
    print(sorted(num_friends_by_id,
                 key=lambda pair: pair[1],                       # by number of friends
                 reverse=True))                                  # largest to smallest

    print()
    print("######################")
    print("#")
    print("# DATA SCIENTISTS YOU MAY KNOW")
    print("#")
    print("######################")
    print()


    print("friends of friends bad for user 0:", friends_of_friend_ids_bad(users[0]))
    print("friends of friends for user 3:", friends_of_friend_ids(users[3]))

    print()
    print("######################")
    print("#")
    print("# SALARIES AND TENURES")
    print("#")
    print("######################")
    print()

    print("average salary by tenure", average_salary_by_tenure)
    print("average salary by tenure bucket", average_salary_by_bucket)

    print()
    print("######################")
    print("#")
    print("# MOST COMMON WORDS")
    print("#")
    print("######################")
    print()

    for word, count in words_and_counts.most_common():
        if count > 1:
            print(word, count)









    



('i=', 0, ' j=', 1)
('i=', 0, ' j=', 2)
('i=', 1, ' j=', 2)
('i=', 1, ' j=', 3)
('i=', 2, ' j=', 3)
('i=', 3, ' j=', 4)
('i=', 4, ' j=', 5)
('i=', 5, ' j=', 6)
('i=', 5, ' j=', 7)
('i=', 6, ' j=', 8)
('i=', 7, ' j=', 8)
('i=', 8, ' j=', 9)
 bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce 
('users[0])["id"]          =>', '0')
('users[0])["name"]        =>', 'Hero')
('users[0])["friends"]     =>', "[{'friends': [{'friends': [...], 'id': 0, 'name': 'Hero'}, {'friends': [{'friends': [...], 'id': 0, 'name': 'Hero'}, {...}, {'friends': [{...}, {...}, {'friends': [{...}, {'friends': [{...}, {'friends': [{...}, {'friends': [{...}, {'friends': [{...}, {...}], 'id': 7, 'name': 'Devin'}, {'friends': [{...}], 'id': 9, 'name': 'Klein'}], 'id': 8, 'name': 'Kate'}], 'id': 6, 'name': 'Hicks'}, {'friends': [{...}, {'friends': [{'friends': [{...}, {...}], 'id': 6, 'name': 'Hicks'}, {...}, {'friends': [{...}], 'id': 9, 'name': 'Klein'}], 'id': 8, 'name': 'Kate'}], 'id': 7, 'name': 'Devin'}], 'id': 5, 'name': 'Clive'}], 'id': 4, 'name': 'Thor'}], 'id': 3, 'name': 'Chi'}], 'id': 2, 'name': 'Sue'}, {'friends': [{...}, {'friends': [{'friends': [...], 'id': 0, 'name': 'Hero'}, {...}, {...}], 'id': 2, 'name': 'Sue'}, {'friends': [{...}, {'friends': [{...}, {'friends': [{...}, {'friends': [{...}, {'friends': [{...}, {...}], 'id': 7, 'name': 'Devin'}, {'friends': [{...}], 'id': 9, 'name': 'Klein'}], 'id': 8, 'name': 'Kate'}], 'id': 6, 'name': 'Hicks'}, {'friends': [{...}, {'friends': [{'friends': [{...}, {...}], 'id': 6, 'name': 'Hicks'}, {...}, {'friends': [{...}], 'id': 9, 'name': 'Klein'}], 'id': 8, 'name': 'Kate'}], 'id': 7, 'name': 'Devin'}], 'id': 5, 'name': 'Clive'}], 'id': 4, 'name': 'Thor'}], 'id': 3, 'name': 'Chi'}], 'id': 1, 'name': 'Dunn'}, {'friends': [{'friends': [...], 'id': 0, 'name': 'Hero'}, {'friends': [{'friends': [...], 'id': 0, 'name': 'Hero'}, {...}, {'friends': [{...}, {...}, {'friends': [{...}, {'friends': [{...}, {'friends': [{...}, {'friends': [{...}, {'friends': [{...}, {...}], 'id': 7, 'name': 'Devin'}, {'friends': [{...}], 'id': 9, 'name': 'Klein'}], 'id': 8, 'name': 'Kate'}], 'id': 6, 'name': 'Hicks'}, {'friends': [{...}, {'friends': [{'friends': [{...}, {...}], 'id': 6, 'name': 'Hicks'}, {...}, {'friends': [{...}], 'id': 9, 'name': 'Klein'}], 'id': 8, 'name': 'Kate'}], 'id': 7, 'name': 'Devin'}], 'id': 5, 'name': 'Clive'}], 'id': 4, 'name': 'Thor'}], 'id': 3, 'name': 'Chi'}], 'id': 1, 'name': 'Dunn'}, {'friends': [{'friends': [{'friends': [...], 'id': 0, 'name': 'Hero'}, {...}, {...}], 'id': 1, 'name': 'Dunn'}, {...}, {'friends': [{...}, {'friends': [{...}, {'friends': [{...}, {'friends': [{...}, {'friends': [{...}, {...}], 'id': 7, 'name': 'Devin'}, {'friends': [{...}], 'id': 9, 'name': 'Klein'}], 'id': 8, 'name': 'Kate'}], 'id': 6, 'name': 'Hicks'}, {'friends': [{...}, {'friends': [{'friends': [{...}, {...}], 'id': 6, 'name': 'Hicks'}, {...}, {'friends': [{...}], 'id': 9, 'name': 'Klein'}], 'id': 8, 'name': 'Kate'}], 'id': 7, 'name': 'Devin'}], 'id': 5, 'name': 'Clive'}], 'id': 4, 'name': 'Thor'}], 'id': 3, 'name': 'Chi'}], 'id': 2, 'name': 'Sue'}]")
 bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce bruce 
Counter({0: 2, 5: 1})
()
######################
#
# FINDING KEY CONNECTORS
#
######################
()
('total connections', 24)
('number of users', 11)
('average connections', 2)
()
users sorted by number of friends:
[(1, 3), (2, 3), (3, 3), (5, 3), (8, 3), (0, 2), (4, 2), (6, 2), (7, 2), (9, 1), (10, 0)]
()
######################
#
# DATA SCIENTISTS YOU MAY KNOW
#
######################
()
('friends of friends bad for user 0:', [0, 2, 3, 0, 1, 3])
('friends of friends for user 3:', Counter({0: 2, 5: 1}))
()
######################
#
# SALARIES AND TENURES
#
######################
()
('average salary by tenure', {6.5: 69000, 7.5: 76000, 6: 76000, 10: 83000, 8.1: 88000, 4.2: 63000, 8.7: 83000, 0.7: 48000, 1.9: 48000, 2.5: 60000})
('average salary by tenure bucket', {'more than five': 79166, 'between two and five': 61500, 'less than two': 48000})
()
######################
#
# MOST COMMON WORDS
#
######################
()
('learning', 3)
('java', 3)
('python', 3)
('big', 3)
('data', 3)
('hbase', 2)
('regression', 2)
('cassandra', 2)
('statistics', 2)
('probability', 2)
('hadoop', 2)
('networks', 2)
('machine', 2)
('neural', 2)
('scikit-learn', 2)
('r', 2)



In [ ]:



In [ ]: