Quick Guide to the basics:
myDict1 = { 'one':'first thing', 'two':'secondthing' }
myDict2 = { 1:43, 2:600, 3:-1000.4 }
myDict3 = { 1:"text", 2:345, 3:'another value' }
myDict1['newVal'] = 'something stupid'
myDict1 = { 'one':'first thing', 'two':'secondthing', 'newVal':'something stupid' }
del myDict1['newVal']
{ 'one':'first thing', 'two':'secondthing' }
In This Document:
In [35]:
# Ex39 in Learn Python the Hard Way:
# https://learnpythonthehardway.org/book/ex39.html
# edited, expanded, and made PY3.x compliant by Mitch before inclusion in this notebook
# create a mapping of state to abbreviation
states = {
'Oregon': 'OR',
'Florida': 'FL',
'California': 'CA',
'New York': 'NY',
'Michigan': 'MI'
}
# create a basic set of states and some cities in them
cities = {
'CA': 'San Francisco',
'MI': 'Detroit',
'FL': 'Jacksonville'
}
# add some more cities
cities['NY'] = 'New York'
cities['OR'] = 'Portland'
# print out some cities
print('-' * 10)
print("Two cities:")
print("NY State has: %s" %cities['NY'])
print("OR State has: %s" %cities['OR'])
# print some states
print('-' * 10)
print("Abbreviations for Two States:")
# PY 2.7 syntax from original code: print "Michigan's abbreviation is: ", states['Michigan']
print("Michigan's abbreviation is: %s" %states['Michigan'])
print("Florida's abbreviation is: %s" %states['Florida'])
# do it by using the state then cities dict
print('-' * 10)
print("State Abbreviation extracted from cities dictionary:")
print("Michigan has: %s" %cities[states['Michigan']])
print("Florida has: %s" %cities[states['Florida']])
# print every state abbreviation
print('-' * 10)
print("Every State Abbreviation:")
for state, abbrev in states.items():
print("%s is abbreviated %s" % (state, abbrev))
# print every city in state
print('-' * 10)
print("Every city in Every State:")
for abbrev, city in cities.items():
print("%s has the city %s" %(abbrev, city))
# now do both at the same time
print('-' * 10)
print("Do Both at Once:")
for state, abbrev in states.items():
print("%s state is abbreviated %s and has city %s" % (
state, abbrev, cities[abbrev]))
print('-' * 10)
In [36]:
# ex 39 Python the Hard Way modified code continued ...
# safely get a abbreviation by state that might not be there
state = states.get('Texas')
if not state:
print("Sorry, no Texas.")
# get a city with a default value
city = cities.get('TX', 'Does Not Exist')
print("The city for the state 'TX' is: %s" % city)
print("The city for the state 'FL' is: %s" % states.get('Florida'))
city2 = states.get('Hawaii')
print("city2: %s" %city2)
if city2 == None:
city2 = 'Value == None'
elif city2 == '':
city2 = 'Value is empty ""'
elif not city2:
city2 = 'Value Missing (Passed not test)'
else:
city2 = 'No Such Value'
print("The city for the state 'HI' is: %s" % city2)
print("These commands used .get() to safely retrieve a value")
In [37]:
# more tests on above code from Learn Python the Hard Way:
print(not city2)
# tests that produce an error - numerical indexing has no meaning in dictionaries:
# print states[1][1]
In [38]:
# what happens if all keys are not unique?
foods = {
'fruit': 'banana',
'fruit': 'apple',
'meat': 'beef'
}
for foodType, indivFood in foods.items():
print("%s includes %s" % (foodType, indivFood))
# answer: does not happen. 2nd attempt to use same key over-writes the first
# remove elements from a dictionary
del foods['meat']
# add an element to dictionary
foods['vegetables'] = 'carrot'
foods['meats'] = 'chicken'
# change an element to a dictionary
foods['vegetables'] = 'corn'
foods
Out[38]:
In [39]:
# from MIT Big Data Class:
# Associative Arrays ==> Called "Dictionaries" or "Maps" in Python
# each value has a key that you can use to find it - { Key:Value }
super_heroes = {'Spider Man' : 'Peter Parker',
'Super Man' : 'Clark Kent',
'Wonder Woman': 'Dianna Prince',
'The Flash' : 'Barry Allen',
'Professor X' : 'Charles Exavior',
'Wolverine' : 'Logan'}
print("%s %s" %("len(super_heroes): )", len(super_heroes)))
print("%s %s" %("Secret Identity for The Flash:", super_heroes['The Flash']))
del super_heroes['Wonder Woman']
print("%s %s" %("len(super_heroes): )", len(super_heroes)))
print(super_heroes)
super_heroes['Wolverine'] = 'John Logan'
print("Secret Identify for Wolverine:", super_heroes.get("Wolverine"))
print("Keys ... then Values (for super_heroes):")
print(super_heroes.keys())
print(super_heroes.values())
In [41]:
# list of dictionaries:
FoodList = [foods, {'meats':'beef', 'fruit':'banana', 'vegetables':'broccoli'}]
print(FoodList[0])
print(FoodList[1])
In [42]:
# dictionary of dictionaries (sometimes called "nested dictionary"):
# note: this is an example only. In real world, sinde FoodList is inclusive of foods, you probably would not include both
# uniform structures (same number of levels across all elements) is also advisable if possible
nestedDict = { 'heroes':super_heroes, 'foods': foods, 'complex_foods':FoodList }
print(nestedDict['heroes'])
print('-'*72)
print(nestedDict['complex_foods'])
This section has additional resources for working with dictionaries and nested dictionaries:
In [43]:
# Help on Collections Objects including Counter, OrderedDict, dequeu, etc:
# https://docs.python.org/2/library/collections.html
# regular dictionary does not necessarily preserve order (things added in randomly?)
# original order of how you add elements is prserved in OrderedDict
from collections import OrderedDict
myOrdDict = OrderedDict({'banana': 3, 'apple': 4, 'pear': 1, 'orange': 2})
print(myOrdDict)
myOrdDict['pork belly'] = 7
print(myOrdDict)
myOrdDict['sandwich'] = 5
print(myOrdDict)
myOrdDict['hero'] = 5
print(myOrdDict)
In [45]:
# sorting the ordered dictionary ...
# dictionary sorted by key
# replacing original OrderedDict w/ results
myOrdDict = OrderedDict(sorted(myOrdDict.items(), key=lambda t: t[0]))
print("myOrdDict (sorted by key):\n %s" %myOrdDict)
# dictionary sorted by value
myOrdDict2 = OrderedDict(sorted(myOrdDict.items(), key=lambda t: t[1]))
print("myOrdDict2 (sorted by value):\n %s" %myOrdDict2)
# dictionary sorted by length of the key string
myOrdDict3 = OrderedDict(sorted(myOrdDict.items(), key=lambda t: len(t[0])))
print("myOrdDict3 (sorted by length of key):\n %s" %myOrdDict3)
In [46]:
# collections.OrderedDict(sorted(dictionary.items(), reverse=True))
# pd.Series(OrderedDict(sorted(browser.items(), key=lambda v: v[1])))
# changing sort order to reverse key sort:
myOrdDict3 = OrderedDict(sorted(myOrdDict.items(), reverse=True))
print("myOrdDict3 (reverse key sort):\n %s" %myOrdDict3)
In [12]:
# testing of above strategy ... usually works but encountered cases where it failed for no known reason
# lambda approach may be more reliable:
import pandas as pd
# value sort as pandas series:
myOrdDict4 = pd.Series(OrderedDict(sorted(myOrdDict.items(), key=lambda v: v[1])))
print("myOrdDict4 (value sort / alternate method):\n %s" %myOrdDict4)
In [13]:
# value sort in reverse order:
myOrdDict5 = OrderedDict(sorted(myOrdDict.items(), key=lambda t: (-t[1],t[0])))
print("myOrdDict5 (sorted by value in reverse order):\n %s" %myOrdDict5)
In [14]:
# Help on Collections Objects including Counter, OrderedDict, dequeu, etc:
# https://docs.python.org/2/library/collections.html
# sample using a list:
# for word in ['red', 'blue', 'red', 'green', 'blue', 'blue']:
# cnt[word] += 1
from collections import Counter
cnt = Counter()
for num in myOrdDict.values():
cnt[num] +=1
print(cnt)
In [15]:
# http://stackoverflow.com/questions/11089655/sorting-dictionary-python-3
# another approach proposed in 2013 on Stack Overflow (but this may have been newer than OrderdDict at the time)
''' Help topic recommends this approach:
pip install sortedcontainers
Then:
from sortedcontainers import SortedDict
myDic = SortedDict({10: 'b', 3:'a', 5:'c'})
sorted_list = list(myDic.keys())
'''
print("conda install sortedcontainers is available in Python 2.7 and 3.6 as of April 2017")
In [16]:
# some dictionaries to work with ...
super_heroes # created earlier
Out[16]:
In [17]:
super_heroes['The Incredible Hulk'] = 'Bruce Banner'
In [18]:
super_heroes # seems to alpha sort on keys anyway
Out[18]:
In [48]:
# quick case study exploring another means of reverse sorting (from Stack Overflow):
reversed_tst = OrderedDict(list(super_heroes.items())[::-1])
reversed_tst # note how in this instance, we don't get what we expected
# this example might not be advisable ...
Out[48]:
In [51]:
# however ... if we combine methodologies:
reversed_tst = OrderedDict(sorted(super_heroes.items(), key=lambda v: v[1])[::-1])
reversed_tst # now the values are in reverse order ...
Out[51]:
In [52]:
# however ... if we combine methodologies:
reversed_tst = OrderedDict(sorted(super_heroes.items(), key=lambda k: k)[::-1])
reversed_tst # now the keys are in reverse order ...
Out[52]:
In [19]:
fruitDict = {3: 'banana', 4: 'pear', 1: 'apple', 2: 'orange'}
fruitDict # dictionaries appear to alpha sort at least on output making it hard to spot the effects below
Out[19]:
In [20]:
# help on library:
# http://www.grantjenks.com/docs/sortedcontainers/sorteddict.html
# test sample code from Stack Overflow post:
from sortedcontainers import SortedDict
myDic = SortedDict({10: 'b', 3:'a', 5:'c'})
sorted_list = list(myDic.keys())
print(myDic)
print(sorted_list)
In [21]:
fruitDict = SortedDict(fruitDict)
sorted_list = list(fruitDict.keys())
print(fruitDict)
print(sorted_list)
So when to do what?
**Final note: only SortedDict
allows indexing by numerical order on the data (by-passing keys) under both Python 2.7 and 3.6 (as shown in the next section)
In [40]:
# MIT Big Data included a demo of this type of index/access to a dictionary in a Python 2.7 notebook
# the code is organized in a try-except block here so it won't halt the notebook if converted to Python 3.6
def print_1st_keyValue(someDict):
try:
print(someDict.values()[0]) # only works in Python 2.7
except Exception as ee:
print(str(type(ee)) + ": " + str(ee)) # error from PY 3.6:
# <class 'TypeError'>: 'dict_values' object does not support indexing
finally:
try:
print(someDict.keys()[0]) # only works in Python 2.7
except Exception as ee:
print(str(type(ee)) + ": " + str(ee)) # error from PY 3.6:
# <class 'TypeError'>: 'dict_keys' object does not support indexing
print_1st_keyValue(super_heroes)
In [44]:
print_1st_keyValue(myOrdDict) # run same test on ordered dictionaries
# failed in Python 3.6, worked in Python 2.7
# reminder: syntax is orderedDict.values()[0], orderedDict.keys()[0]
In [22]:
print_1st_keyValue(fruitDict) # run same test on sorted dictionary -
# this works in Python 3.6 and 2.7
# reminder: syntax is sortedDict.values()[0], sortedDict.keys()[0]
In [23]:
# dictionary comprehension
[ k for k in fruitDict if k > 2 ]
Out[23]:
In [24]:
[ fruitDict[k] for k in fruitDict if k > 1 ]
Out[24]:
In [25]:
newDict = { k*2:'fruit - '+fruitDict[k] for k in fruitDict if k > 1 and len(fruitDict[k]) >=6}
print(newDict)
type(newDict)
Out[25]:
In [26]:
class KeyDict(dict):
def __missing__(self, key):
#self[key] = key # uncomment if desired behavior is to add keys when they are not found (w/ key as value)
#this version returns the key that was not found
return key
kdTst = KeyDict(super_heroes)
print(kdTst['The Incredible Hulk'])
print(kdTst['Ant Man']) # value not found so it returns itself as per __missing__ over-ride
In [27]:
help(SortedDict)
In [ ]: