Scrapin' the Webz


In [4]:
!pip3 install bs4


Requirement already satisfied (use --upgrade to upgrade): bs4 in /usr/local/lib/python3.5/site-packages
Requirement already satisfied (use --upgrade to upgrade): beautifulsoup4 in /usr/local/lib/python3.5/site-packages (from bs4)

In [5]:
from bs4 import BeautifulSoup

In [6]:
from urllib.request import urlopen
html_str = urlopen("http://static.decontextualize.com/kittens.html").read()

In [7]:
print(html_str)


b'<!doctype html>\n<html>\n\t<head>\n\t\t<title>Kittens!</title>\n\t\t<style type="text/css">\n\t\t\tspan.lastcheckup { font-family: "Courier", fixed; font-size: 11px; }\n\t\t</style>\n\t</head>\n\t<body>\n\t\t<h1>Kittens and the TV Shows They Love</h1>\n\t\t<div class="kitten">\n\t\t\t<h2>Fluffy</h2>\n\t\t\t<div><img src="http://placekitten.com/120/120"></div>\n\t\t\t<ul class="tvshows">\n\t\t\t\t<li>\n\t\t\t\t\t<a href="http://www.imdb.com/title/tt0106145/">Deep Space Nine</a>\n\t\t\t\t</li>\n\t\t\t\t<li>\n\t\t\t\t\t<a href="http://www.imdb.com/title/tt0088576/">Mr. Belvedere</a>\n\t\t\t\t</li>\n\t\t\t</ul>\n\t\t\tLast check-up: <span class="lastcheckup">2014-01-17</span>\n\t\t</div>\n\t\t<div class="kitten">\n\t\t\t<h2>Monsieur Whiskeurs</h2>\n\t\t\t<div><img src="http://placekitten.com/110/110"></div>\n\t\t\t<ul class="tvshows">\n\t\t\t\t<li>\n\t\t\t\t\t<a href="http://www.imdb.com/title/tt0106179/">The X-Files</a>\n\t\t\t\t</li>\n\t\t\t\t<li>\n\t\t\t\t\t<a href="http://www.imdb.com/title/tt0098800/">Fresh Prince</a>\n\t\t\t\t</li>\n\t\t\t</ul>\n\t\t\tLast check-up: <span class="lastcheckup">2013-11-02</span>\n\t\t</div>\n\t</body>\n</html>\n\n'

In [8]:
document = BeautifulSoup(html_str,"html.parser")

In [9]:
type(document)


Out[9]:
bs4.BeautifulSoup

In [10]:
h1_tag = document.find('h1')

In [11]:
h1_tag.string


Out[11]:
'Kittens and the TV Shows They Love'

In [12]:
img_tag = document.find('img')

In [13]:
img_tag.string

In [14]:
img_tag('src')


Out[14]:
[]

In [15]:
img_tag['src']


Out[15]:
'http://placekitten.com/120/120'

In [16]:
document.find_all('img')


Out[16]:
[<img src="http://placekitten.com/120/120"/>,
 <img src="http://placekitten.com/110/110"/>]

In [17]:
img_tags=document.find_all('img')

In [18]:
type(img_tags)


Out[18]:
bs4.element.ResultSet

In [19]:
first_img = img_tags[0]

In [20]:
first_img['src']


Out[20]:
'http://placekitten.com/120/120'

In [21]:
second_img = img_tags[1]

In [22]:
second_img['src']


Out[22]:
'http://placekitten.com/110/110'

In [23]:
for item in img_tags:
    print(item['src'])


http://placekitten.com/120/120
http://placekitten.com/110/110

In [24]:
h2_tags = document.find_all('h2')
for item in h2_tags:
    print(item.string)


Fluffy
Monsieur Whiskeurs

In [25]:
checkups = document.find_all('span',{'class':'lastcheckup'})
for item in checkups:
    print(item.string)


2014-01-17
2013-11-02

In [26]:
kittens = document.find_all('div', {'class': 'kitten'})
for item in kittens:
    h2_tag = item.find('h2')
    print(h2_tag.string)
    checkup = item.find('span')
    print(checkup.string)


Fluffy
2014-01-17
Monsieur Whiskeurs
2013-11-02

In [27]:
kittens = document.find_all('div', {'class': 'kitten'})

In [28]:
first_kitten = kittens[0]
first_kitten_h2 = first_kitten.find('h2')
print(first_kitten_h2.string)


Fluffy

In [29]:
planets = ["Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune"]

In [30]:
separator = ","

In [31]:
separator.join(planets)


Out[31]:
'Mercury,Venus,Earth,Mars,Jupiter,Saturn,Uranus,Neptune'

But first, an aside about joining strings


In [32]:
print("&\n".join(planets))


Mercury&
Venus&
Earth&
Mars&
Jupiter&
Saturn&
Uranus&
Neptune

In [33]:
print("&\n".join(planets[:4]))


Mercury&
Venus&
Earth&
Mars

In [34]:
kittens = document.find_all('div', {'class': 'kitten'})
for item in kittens:
    h2_tag = item.find('h2')
    print(h2_tag.string)
    a_tags = item.find_all('a') #anchor tag, ancestor
    all_shows_str = [] #create a new list
    for a_tag_item in a_tags:
        #print("-", a_tag_item.string)
        tag_str = a_tag_item.string
        all_shows_str.append(tag_str)
        string_with_all_show_names = ",".join(all_shows_str)  
        print(h2_tag.string+ ":", string_with_all_show_names)


Fluffy
Fluffy: Deep Space Nine
Fluffy: Deep Space Nine,Mr. Belvedere
Monsieur Whiskeurs
Monsieur Whiskeurs: The X-Files
Monsieur Whiskeurs: The X-Files,Fresh Prince

In [35]:
kittens_data = list()#create an empty list
kittens = document.find_all('div', {'class': 'kitten'})
for item in kittens:
    h2_tag = item.find('h2')
    print(h2_tag.string)
    a_tags = item.find_all('a') #anchor tag, ancestor
    all_shows_str = [] #create a new list
    for a_tag_item in a_tags:
        #print("-", a_tag_item.string)
        tag_str = a_tag_item.string
        all_shows_str.append(tag_str)
        #1 create a dictionary and add to it the relevant key/value pairs
        #kitten_map = {}
        #kitten_map["name"] = h2_tag.string
        #kitten_map["tvshows"] = all_shows_str
        kitten_map = {"name":h2_tag.string, "tvshows":all_shows_str }
        #2 append that dictionary to the kittens_data
        string_with_all_show_names = ",".join(all_shows_str)  
        #print(h2_tag.string+ ":", string_with_all_show_names
kittens_data


Fluffy
Monsieur Whiskeurs
Out[35]:
[]

In [36]:
kittens_data = list()#create an empty list
kittens = document.find_all('div', {'class': 'kitten'})
for item in kittens:
    h2_tag = item.find('h2')
    print(h2_tag.string)
    a_tags = item.find_all('a') #anchor tag, ancestor
    all_shows_str = []
    for a_tag_item in a_tags:
        tag_str = a_tag_item.string
        all_shows_str.append(tag_str)
    #create a dictionary adding kittens checkups
    checkup = item.find('span')# get the string with checkup.string
    kittens_data.append(
              {"name":h2_tag.string, 
                        "tvshows":all_shows_str,
                        "last_checkup": checkup.string})
kittens_data


Fluffy
Monsieur Whiskeurs
Out[36]:
[{'last_checkup': '2014-01-17',
  'name': 'Fluffy',
  'tvshows': ['Deep Space Nine', 'Mr. Belvedere']},
 {'last_checkup': '2013-11-02',
  'name': 'Monsieur Whiskeurs',
  'tvshows': ['The X-Files', 'Fresh Prince']}]

Another Aside: lists and ...lists


In [37]:
Our next goal is to create a data structure that looks like this: 
    [
        {'name': 'Fluffy',
          'tv shows': ['Deep Space Nine', 'Mr.Belvedere']},
        {}


  File "<ipython-input-37-50d1845d425b>", line 1
    Our next goal is to create a data structure that looks like this:
           ^
SyntaxError: invalid syntax

In [38]:
x = ["a", "b", "c", "d"]

In [39]:
x[0]


Out[39]:
'a'

In [40]:
x.append("e")

In [41]:
len(x)


Out[41]:
5

In [42]:
x[4]


Out[42]:
'e'

In [43]:
numbers = [1,2,3,4,5,6]
# end up with: [1,4,9,16,25,36]

In [44]:
squared = [item * item for item in numbers]
for item in numbers:
    s = item*item
    squared.append(s)

In [45]:
squared


Out[45]:
[1, 4, 9, 16, 25, 36, 1, 4, 9, 16, 25, 36]

In [46]:
## Aside the Third: Making dictionaries
#declaring a dictionary
x = {'a':1, 'b':2, 'c':3}

In [47]:
#get a value out of a dictionary
x['a']


Out[47]:
1

In [48]:
x.keys()


Out[48]:
dict_keys(['b', 'c', 'a'])

In [49]:
for key in x.keys():
    print(key) #print out keys


b
c
a

In [50]:
# target: {1:1, 2:4, 3:9, 4:16, 5:25,...}
squares = {}
for n in range(1,11):
    squares[n] = n*n
squares


Out[50]:
{1: 1, 2: 4, 3: 9, 4: 16, 5: 25, 6: 36, 7: 49, 8: 64, 9: 81, 10: 100}

In [51]:
squares[7]


Out[51]:
49

In [52]:
names = ["Aaron", "Bob", "Caroline", "Daphne"]
#target: {"Aaron": 5} #show the name and how many characters each name has
name_length_map = {}#map is 
for item in names:
    name_length_map[item] = len(item)
name_length_map #evaluate the dictionary, Python 3 # take a list and create a new dictionary


Out[52]:
{'Aaron': 5, 'Bob': 3, 'Caroline': 8, 'Daphne': 6}

Scraping the Faculty, how many percentage of the CJ faculty are adjunct faculty

our hypothesis: find all the

  • tags
  • inside the
  • tags, find the

    inside the

    , the name is the content of an

    inside the
  • tags, find the

    with class description ** the title of the professor is the content of that tag.

  • 
    
    In [53]:
    from urllib.request import urlopen
    faculty_html = urlopen("http://www.journalism.columbia.edu/page/10/10?category_ids%5B%5D=2&category_ids%5B%5D=3&category_ids%5B%5D=37").read()
    
    
    
    In [54]:
    document = BeautifulSoup(faculty_html, "html.parser")
    
    
    
    In [55]:
    document.find('h2').string
    
    
    
    
    Out[55]:
    ' Full-Time, Adjunct & Visiting Faculty'
    
    
    In [56]:
    h2_tag = document.find('h2')
    h2_tag.string
    
    
    
    
    Out[56]:
    ' Full-Time, Adjunct & Visiting Faculty'

    very first task: print out the names of all the faculty members.

    
    
    In [57]:
    # this doesn't work, 
    ul_tag = document.find('ul', {'class': 'experts-list'})
    li_tags= ul_tag.find_all('li')
    for item in li_tags:
        h4_tag = item.find('h4')
        if h4_tag: #none counts as false in python, only proceed if we actually found a h4-tag under li tags
            a_tag = h4_tag.find('a')#name of adjunct
            p_tag = item.find('p', {'class':'description'})#position of adjunct
            print(a_tag.string, "/", p_tag.string)
    
    
    
    
    Adkison, Abbey  / Assistant Director, Multi-Media Journalism
    Alarcón, Daniel / Assistant Professor of Broadcast Journalism
    Barclay, Dolores  / Adjunct Faculty
    Baum, Geraldine / Adjunct Faculty
    Bell, Emily / Professor of Professional Practice & Director, Tow Center for Digital Journalism
    Benedict, Helen  / Professor
    Bennet, John  / Adjunct Faculty
    Bennett, Rob / Adjunct Faculty
    Berman, Nina / Associate Professor
    Blair, Gwenda  / Adjunct Faculty
    Blum, David  / Adjunct Faculty
    Bockelman, Matt / None
    Bodarky, George / Adjunct Assistant Professor 
    Bogdanich, Walt  / Adjunct Faculty
    Bourin, Lennart / Adjunct Faculty
    Bradley, Theresa / Adjunct Faculty
    Brainard, Curtis  / Staff Writer
    Bruder, Jessica / Adjunct Faculty
    Burford, Melanie  / Adjunct Faculty
    Burleigh, Nina  / Adjunct Faculty
    Cabot, Heather / Adjunct Professor
    Cabral, Elena  / Adjunct Faculty & Assistant Director, Student Services
    Canipe, Chris / None
    Casciato, Tom / Adjunct Faculty
    Cohen, Julie / Adjunct Faculty
    Cohen, Lisa R. / Director, duPont/Professional Prizes; Adjunct Associate Professor
    Cohen, Sarah / Adjunct Faculty
    Coll, Steve / Dean & Henry R. Luce Professor of Journalism
    Cooper, Ann / CBS Professor of Professional Practice in International Journalism
    Coronel, Sheila  / Toni Stabile Professor of Professional Practice in Investigative Journalism; Director, Toni Stabile Center for Investigative Journalism, and Dean of Academic Affairs
    Coyne , Kevin  / Adjunct Faculty
    Cross, June  / Professor 
    Cunningham, Brent  / Deputy Editor
    DePalma, Anthony / Adjunct Faculty
    Deitsch, Richard / Adjunct Faculty
    Diamond, Becky / None
    Dinges, John / Godfrey Lowell Cabot Professor Emeritus
    Donahue, Kerry  / Adjunct Faculty & Director, Radio Program
    Drew, Christopher  / Adjunct Faculty
    Edsall, Thomas B.  / None
    Einhorn, Cheryl / Adjunct Faculty
    Elliott, Justin  / Adjunct Assistant Professor
    Epstein, Randi Hutter  / Adjunct Faculty 
    Evans, Farrell  / Adjunct Faculty
    Ford, Constance Mitchell  / Adjunct Faculty
    Freedman, Samuel  / Professor
    Freeman, George  / Adjunct Faculty
    French, Howard  / Associate Professor
    Fried, Stephen  / Adjunct Faculty
    Garcia, Mario / Senior Adviser on News Design/Adjunct Professor
    Gezari, Vanessa / None
    Gilderman, Greg / Adjunct Faculty
    Gitlin, Todd / Professor & Chair, Ph.D. Program
    Giudice, Barbara  / Adjunct Faculty
    Goldensohn, Marty / Adjunct Faculty
    Goldman, Ari  / Professor
    Goldstein, Jacob / Adjunct Professor
    Grueskin, Bill / Professor of Professional Practice 
    Haburchak, Alan / Adjunct Faculty
    Hajdu, David  / Associate Professor 
    Hancock, LynNell / H. Gordon Garbedian Professor of Journalism & Director, Spencer Fellowship Program
    Hansen, Mark / Director, David and Helen Gurley Brown Institute for Media Innovation & Professor of Journalism 
    Harris, Mark / Adjunct Faculty
    Hartenstein, Julie / Associate Dean
    Heinzerling, Larry / Adjunct Faculty
    Herman, Tom  / Adjunct Faculty
    Hickey, Neil  / Adjunct Faculty
    Hoel, Lars  / None
    Hogan, Pamela / Adjunct Faculty
    Holloway, Marguerite  / Associate Professor of Professional Practice and Director, Science & Environmental Journalism
    Hoyt, Michael  / Adjunct Faculty
    Isabel, Lonnie / Senior Lecturer in Discipline
    Jennings, Tom / Adjunct Faculty
    John, Richard R.  / Professor of History and Communications
    Jones, Matthew L.  / Instructor, The Lede Program
    Kann, Peter R.  / Adjunct Faculty
    Kantrowitz, Barbara  / Adjunct Professor
    Karle, Stuart / Adjunct Faculty; William J. Brennan Jr. Visiting Professor of First Amendment Issues
    Karr, Rick / Adjunct Faculty
    Kellogg, David / Adjunct Faculty
    Kennedy, Lucy / Adjunct Faculty
    Kent, Thomas  / Adjunct Faculty
    Klatell, David / Professor of Professional Practice & Chair, International Studies
    Klein, Adam / Adjunct Professor
    Kleman, Kim  / Adjunct Faculty
    Knee, Jonathan / Adjunct Professor
    Konner, Joan / Dean Emerita
    Kozar, Matt / Adjunct Faculty
    Lehmann-Haupt, Christopher  / Adjunct Faculty
    Lemann, Nicholas / Joseph Pulitzer II and Edith Pulitzer Moore Professor of Journalism; Dean Emeritus
    Levenson, Jacob  / Adjunct Faculty
    Lipsky, Seth  / Adjunct Faculty
    Lombardi, Kristen / Adjunct Faculty
    Luhby, Tami / Adjunct Faculty
    Maciulis, Tony / Adjunct Faculty
    Maharidge, Dale  / Professor 
    Mason, Tom / None
    Matloff, Judith  / Adjunct faculty
    Maytal, Itai / Adjunct Faculty
    McCormick, David  / Adjunct Faculty
    McCray, Melvin / Adjunct Faculty
    McDonald, Erica / None
    McGregor, Susan E. / Assistant Professor & Assistant Director, Tow Center for Digital Journalism
    Mencher, Melvin / Professor Emeritus
    Merchant, Preston / None
    Mintz, James / Adjunct Faculty
    Morais, Betsy / Adjunct Faculty
    Nasar, Sylvia  / John S. and James L. Knight Professor of Business Journalism
    Navasky, Victor / George T. Delacorte Professor in Magazine Journalism Emeritus
    Newman, Maria / Adjunct Faculty
    Nisenholtz, Martin / Adjunct Professor 
    Norton, Rob / Adjunct Faculty
    Nosheen, Habiba / Adjunct Professor
    Ornstein, Charles / Adjunct Faculty
    Padawer , Ruth / Adjunct Professor
    Parker, Diantha / Adjunct Faculty
    Parrish, Allison  / Instructor, The Lede Program
    Patel, Samir S. / Adjunct Faculty
    Paunescu, Delia  / None
    Perlman, Merrill / Adjunct Faculty
    Pollak, Lisa / Adjunct Faculty
    Pool-Eckert, Marquita / Adjunct Faculty
    Richardson, Lynda  / Adjunct Faculty
    Richardson, Whitney  / Adjunct Professor
    Richman, Joe / Adjunct Faculty
    Robbins, Ed / Adjunct Faculty
    Roberts, Fletcher / Adjunct Faculty
    Sacha, Bob  / Adjunct Faculty
    Sataline, Suzanne / Adjunct Faculty
    Schapiro, Rich / Adjunct Faculty
    Schatz, Robin / Adjunct Faculty
    Schecter, B.J. / Adjunct Faculty
    Schellmann, Hilke / Adjunct Faculty
    Schoen, John / Adjunct Faculty
    Schoonmaker, Mary Ellen / Adjunct Faculty
    Schudson, Michael  / Professor 
    Schumacher-Matos, Ed  / Adjunct Faculty
    Schwartz, Jack  / None
    Seave, Ava  / Adjunct Faculty
    Segnini, Giannina  / Director of the Data Concentration Program
    Shanor, Donald / G. L. Cabot Professor Emeritus 
    Shapiro, Bruce / Executive Director, Dart Center for Journalism and Trauma and Senior Advisor for Academic Affairs
    Shapiro, Michael  / Professor 
    Shihab-Eldin, Ahmed  / Adjunct Assistant Professor 
    Sicha, Choire / Adjunct Professor
    Siegel, Lloyd / Adjunct Faculty
    Singer, Amy  / Adjunct Faculty
    Sliwa, Maria / Adjunct Faculty
    Solomon, Alisa / Professor & Director, Arts Concentration, M.A. Program
    Soma, Jonathan / Director, The Lede Program
    Sotomayor, Ernest / Dean of Student Affairs & Director, Latin American Initiatives
    Span, Paula  / Adjunct Professor
    Spanninger, Martha / Adjunct Faculty
    Stabiner, Karen / Adjunct Faculty
    Stewart, James / Bloomberg Professor of Business Journalism
    Stille, Alexander / San Paolo Professor of International Journalism
    Subramanian, Sushma / Adjunct Faculty
    Surowicz, Simon / None
    Templin, Jacob / None
    Tenen, Dennis / Instructor, The Lede Program
    Topping, Seymour   / San Paolo Professor of International Journalism Emeritus
    Trivedi, Yogi  / Adjunct Professor
    Tsiantar, Dody  / Adjunct Faculty
    Tu, Duy Linh / Associate Professor of Professional Practice & Director, Digital Media Program 
    Tucher, Andie  / Associate Professor; Director, Ph.D. Program
    Ventura, Michael / Adjunct Faculty
    Wald, Jonathan / Adjunt Faculty
    Wald, Richard / Fred W. Friendly Professor of Professional Practice in Media Society Emeritus
    Wayne, Leslie / Adjunct Faculty
    Weiner, Jonathan  / Maxwell M. Geffen Professor of Medical and Scientific Journalism 
    West, Betsy  / Fred W. Friendly Professor of Professional Practice in Media and Society
    Wheatley, Jr., William / Adjunct Faculty
    Wiggins, Chris / Instructor, The Lede Program
    Wilson, Duff / Adjunct Faculty
    Woodward, Tali  / Director, M.A. Program
    Wu, Tim / Director of the Saul and Janice Poliak Center for the Study of First Amendment Issues
    Yu, Frederick T C. / CBS Professor Emeritus International Journalism
    Zucker, John / Adjunct Faculty
    Zuckerman, Jocelyn Craugh  / Adjunct Faculty
    

    Now, we want to make a list of dictionaries of faculty members along with their titles [{'name': 'Bodarky George', 'title': 'Adjunct Assistant Professor '}, {'name':''}]

    
    
    In [58]:
    profs = []
    ul_tag = document.find('ul', {'class': 'experts-list'})
    li_tags= ul_tag.find_all('li')
    for item in li_tags:
        h4_tag = item.find('h4')
        if h4_tag: #none counts as false in python, only proceed if we actually found a h4-tag under li tags
            a_tag = h4_tag.find('a')
            p_tag = item.find('p', {'class':'description'})
            prof_map = {'name': a_tag.string, 'title': p_tag.string}
            profs.append(prof_map)
    profs
    
    
    
    
    Out[58]:
    [{'name': 'Adkison, Abbey ',
      'title': 'Assistant Director, Multi-Media Journalism'},
     {'name': 'Alarcón, Daniel',
      'title': 'Assistant Professor of Broadcast Journalism'},
     {'name': 'Barclay, Dolores ', 'title': 'Adjunct Faculty'},
     {'name': 'Baum, Geraldine', 'title': 'Adjunct Faculty'},
     {'name': 'Bell, Emily',
      'title': 'Professor of Professional Practice & Director, Tow Center for Digital Journalism'},
     {'name': 'Benedict, Helen ', 'title': 'Professor'},
     {'name': 'Bennet, John ', 'title': 'Adjunct Faculty'},
     {'name': 'Bennett, Rob', 'title': 'Adjunct Faculty'},
     {'name': 'Berman, Nina', 'title': 'Associate Professor'},
     {'name': 'Blair, Gwenda ', 'title': 'Adjunct Faculty'},
     {'name': 'Blum, David ', 'title': 'Adjunct Faculty'},
     {'name': 'Bockelman, Matt', 'title': None},
     {'name': 'Bodarky, George', 'title': 'Adjunct Assistant Professor '},
     {'name': 'Bogdanich, Walt ', 'title': 'Adjunct Faculty'},
     {'name': 'Bourin, Lennart', 'title': 'Adjunct Faculty'},
     {'name': 'Bradley, Theresa', 'title': 'Adjunct Faculty'},
     {'name': 'Brainard, Curtis ', 'title': 'Staff Writer'},
     {'name': 'Bruder, Jessica', 'title': 'Adjunct Faculty'},
     {'name': 'Burford, Melanie ', 'title': 'Adjunct Faculty'},
     {'name': 'Burleigh, Nina ', 'title': 'Adjunct Faculty'},
     {'name': 'Cabot, Heather', 'title': 'Adjunct Professor'},
     {'name': 'Cabral, Elena ',
      'title': 'Adjunct Faculty & Assistant Director, Student Services'},
     {'name': 'Canipe, Chris', 'title': None},
     {'name': 'Casciato, Tom', 'title': 'Adjunct Faculty'},
     {'name': 'Cohen, Julie', 'title': 'Adjunct Faculty'},
     {'name': 'Cohen, Lisa R.',
      'title': 'Director, duPont/Professional Prizes; Adjunct Associate Professor'},
     {'name': 'Cohen, Sarah', 'title': 'Adjunct Faculty'},
     {'name': 'Coll, Steve',
      'title': 'Dean & Henry R. Luce Professor of Journalism'},
     {'name': 'Cooper, Ann',
      'title': 'CBS Professor of Professional Practice in International Journalism'},
     {'name': 'Coronel, Sheila ',
      'title': 'Toni Stabile Professor of Professional Practice in Investigative Journalism; Director, Toni Stabile Center for Investigative Journalism, and Dean of Academic Affairs'},
     {'name': 'Coyne , Kevin ', 'title': 'Adjunct Faculty'},
     {'name': 'Cross, June ', 'title': 'Professor '},
     {'name': 'Cunningham, Brent ', 'title': 'Deputy Editor'},
     {'name': 'DePalma, Anthony', 'title': 'Adjunct Faculty'},
     {'name': 'Deitsch, Richard', 'title': 'Adjunct Faculty'},
     {'name': 'Diamond, Becky', 'title': None},
     {'name': 'Dinges, John', 'title': 'Godfrey Lowell Cabot Professor Emeritus'},
     {'name': 'Donahue, Kerry ',
      'title': 'Adjunct Faculty & Director, Radio Program'},
     {'name': 'Drew, Christopher ', 'title': 'Adjunct Faculty'},
     {'name': 'Edsall, Thomas B. ', 'title': None},
     {'name': 'Einhorn, Cheryl', 'title': 'Adjunct Faculty'},
     {'name': 'Elliott, Justin ', 'title': 'Adjunct Assistant Professor'},
     {'name': 'Epstein, Randi Hutter ', 'title': 'Adjunct Faculty '},
     {'name': 'Evans, Farrell ', 'title': 'Adjunct Faculty'},
     {'name': 'Ford, Constance Mitchell ', 'title': 'Adjunct Faculty'},
     {'name': 'Freedman, Samuel ', 'title': 'Professor'},
     {'name': 'Freeman, George ', 'title': 'Adjunct Faculty'},
     {'name': 'French, Howard ', 'title': 'Associate Professor'},
     {'name': 'Fried, Stephen ', 'title': 'Adjunct Faculty'},
     {'name': 'Garcia, Mario',
      'title': 'Senior Adviser on News Design/Adjunct Professor'},
     {'name': 'Gezari, Vanessa', 'title': None},
     {'name': 'Gilderman, Greg', 'title': 'Adjunct Faculty'},
     {'name': 'Gitlin, Todd', 'title': 'Professor & Chair, Ph.D. Program'},
     {'name': 'Giudice, Barbara ', 'title': 'Adjunct Faculty'},
     {'name': 'Goldensohn, Marty', 'title': 'Adjunct Faculty'},
     {'name': 'Goldman, Ari ', 'title': 'Professor'},
     {'name': 'Goldstein, Jacob', 'title': 'Adjunct Professor'},
     {'name': 'Grueskin, Bill', 'title': 'Professor of Professional Practice '},
     {'name': 'Haburchak, Alan', 'title': 'Adjunct Faculty'},
     {'name': 'Hajdu, David ', 'title': 'Associate Professor '},
     {'name': 'Hancock, LynNell',
      'title': 'H. Gordon Garbedian Professor of Journalism & Director, Spencer Fellowship Program'},
     {'name': 'Hansen, Mark',
      'title': 'Director, David and Helen Gurley Brown Institute for Media Innovation & Professor of Journalism '},
     {'name': 'Harris, Mark', 'title': 'Adjunct Faculty'},
     {'name': 'Hartenstein, Julie', 'title': 'Associate Dean'},
     {'name': 'Heinzerling, Larry', 'title': 'Adjunct Faculty'},
     {'name': 'Herman, Tom ', 'title': 'Adjunct Faculty'},
     {'name': 'Hickey, Neil ', 'title': 'Adjunct Faculty'},
     {'name': 'Hoel, Lars ', 'title': None},
     {'name': 'Hogan, Pamela', 'title': 'Adjunct Faculty'},
     {'name': 'Holloway, Marguerite ',
      'title': 'Associate Professor of Professional Practice and Director, Science & Environmental Journalism'},
     {'name': 'Hoyt, Michael ', 'title': 'Adjunct Faculty'},
     {'name': 'Isabel, Lonnie', 'title': 'Senior Lecturer in Discipline'},
     {'name': 'Jennings, Tom', 'title': 'Adjunct Faculty'},
     {'name': 'John, Richard R. ',
      'title': 'Professor of History and Communications'},
     {'name': 'Jones, Matthew L. ', 'title': 'Instructor, The Lede Program'},
     {'name': 'Kann, Peter R. ', 'title': 'Adjunct Faculty'},
     {'name': 'Kantrowitz, Barbara ', 'title': 'Adjunct Professor'},
     {'name': 'Karle, Stuart',
      'title': 'Adjunct Faculty; William J. Brennan Jr. Visiting Professor of First Amendment Issues'},
     {'name': 'Karr, Rick', 'title': 'Adjunct Faculty'},
     {'name': 'Kellogg, David', 'title': 'Adjunct Faculty'},
     {'name': 'Kennedy, Lucy', 'title': 'Adjunct Faculty'},
     {'name': 'Kent, Thomas ', 'title': 'Adjunct Faculty'},
     {'name': 'Klatell, David',
      'title': 'Professor of Professional Practice & Chair, International Studies'},
     {'name': 'Klein, Adam', 'title': 'Adjunct Professor'},
     {'name': 'Kleman, Kim ', 'title': 'Adjunct Faculty'},
     {'name': 'Knee, Jonathan', 'title': 'Adjunct Professor'},
     {'name': 'Konner, Joan', 'title': 'Dean Emerita'},
     {'name': 'Kozar, Matt', 'title': 'Adjunct Faculty'},
     {'name': 'Lehmann-Haupt, Christopher ', 'title': 'Adjunct Faculty'},
     {'name': 'Lemann, Nicholas',
      'title': 'Joseph Pulitzer II and Edith Pulitzer Moore Professor of Journalism; Dean Emeritus'},
     {'name': 'Levenson, Jacob ', 'title': 'Adjunct Faculty'},
     {'name': 'Lipsky, Seth ', 'title': 'Adjunct Faculty'},
     {'name': 'Lombardi, Kristen', 'title': 'Adjunct Faculty'},
     {'name': 'Luhby, Tami', 'title': 'Adjunct Faculty'},
     {'name': 'Maciulis, Tony', 'title': 'Adjunct Faculty'},
     {'name': 'Maharidge, Dale ', 'title': 'Professor '},
     {'name': 'Mason, Tom', 'title': None},
     {'name': 'Matloff, Judith ', 'title': 'Adjunct faculty'},
     {'name': 'Maytal, Itai', 'title': 'Adjunct Faculty'},
     {'name': 'McCormick, David ', 'title': 'Adjunct Faculty'},
     {'name': 'McCray, Melvin', 'title': 'Adjunct Faculty'},
     {'name': 'McDonald, Erica', 'title': None},
     {'name': 'McGregor, Susan E.',
      'title': 'Assistant Professor & Assistant Director, Tow Center for Digital Journalism'},
     {'name': 'Mencher, Melvin', 'title': 'Professor Emeritus'},
     {'name': 'Merchant, Preston', 'title': None},
     {'name': 'Mintz, James', 'title': 'Adjunct Faculty'},
     {'name': 'Morais, Betsy', 'title': 'Adjunct Faculty'},
     {'name': 'Nasar, Sylvia ',
      'title': 'John S. and James L. Knight Professor of Business Journalism'},
     {'name': 'Navasky, Victor',
      'title': 'George T. Delacorte Professor in Magazine Journalism Emeritus'},
     {'name': 'Newman, Maria', 'title': 'Adjunct Faculty'},
     {'name': 'Nisenholtz, Martin', 'title': 'Adjunct Professor '},
     {'name': 'Norton, Rob', 'title': 'Adjunct Faculty'},
     {'name': 'Nosheen, Habiba', 'title': 'Adjunct Professor'},
     {'name': 'Ornstein, Charles', 'title': 'Adjunct Faculty'},
     {'name': 'Padawer , Ruth', 'title': 'Adjunct Professor'},
     {'name': 'Parker, Diantha', 'title': 'Adjunct Faculty'},
     {'name': 'Parrish, Allison ', 'title': 'Instructor, The Lede Program'},
     {'name': 'Patel, Samir S.', 'title': 'Adjunct Faculty'},
     {'name': 'Paunescu, Delia ', 'title': None},
     {'name': 'Perlman, Merrill', 'title': 'Adjunct Faculty'},
     {'name': 'Pollak, Lisa', 'title': 'Adjunct Faculty'},
     {'name': 'Pool-Eckert, Marquita', 'title': 'Adjunct Faculty'},
     {'name': 'Richardson, Lynda ', 'title': 'Adjunct Faculty'},
     {'name': 'Richardson, Whitney ', 'title': 'Adjunct Professor'},
     {'name': 'Richman, Joe', 'title': 'Adjunct Faculty'},
     {'name': 'Robbins, Ed', 'title': 'Adjunct Faculty'},
     {'name': 'Roberts, Fletcher', 'title': 'Adjunct Faculty'},
     {'name': 'Sacha, Bob ', 'title': 'Adjunct Faculty'},
     {'name': 'Sataline, Suzanne', 'title': 'Adjunct Faculty'},
     {'name': 'Schapiro, Rich', 'title': 'Adjunct Faculty'},
     {'name': 'Schatz, Robin', 'title': 'Adjunct Faculty'},
     {'name': 'Schecter, B.J.', 'title': 'Adjunct Faculty'},
     {'name': 'Schellmann, Hilke', 'title': 'Adjunct Faculty'},
     {'name': 'Schoen, John', 'title': 'Adjunct Faculty'},
     {'name': 'Schoonmaker, Mary Ellen', 'title': 'Adjunct Faculty'},
     {'name': 'Schudson, Michael ', 'title': 'Professor '},
     {'name': 'Schumacher-Matos, Ed ', 'title': 'Adjunct Faculty'},
     {'name': 'Schwartz, Jack ', 'title': None},
     {'name': 'Seave, Ava ', 'title': 'Adjunct Faculty'},
     {'name': 'Segnini, Giannina ',
      'title': 'Director of the Data Concentration Program'},
     {'name': 'Shanor, Donald', 'title': 'G. L. Cabot Professor Emeritus '},
     {'name': 'Shapiro, Bruce',
      'title': 'Executive Director, Dart Center for Journalism and Trauma and Senior Advisor for Academic Affairs'},
     {'name': 'Shapiro, Michael ', 'title': 'Professor '},
     {'name': 'Shihab-Eldin, Ahmed ', 'title': 'Adjunct Assistant Professor '},
     {'name': 'Sicha, Choire', 'title': 'Adjunct Professor'},
     {'name': 'Siegel, Lloyd', 'title': 'Adjunct Faculty'},
     {'name': 'Singer, Amy ', 'title': 'Adjunct Faculty'},
     {'name': 'Sliwa, Maria', 'title': 'Adjunct Faculty'},
     {'name': 'Solomon, Alisa',
      'title': 'Professor & Director, Arts Concentration, M.A. Program'},
     {'name': 'Soma, Jonathan', 'title': 'Director, The Lede Program'},
     {'name': 'Sotomayor, Ernest',
      'title': 'Dean of Student Affairs & Director, Latin American Initiatives'},
     {'name': 'Span, Paula ', 'title': 'Adjunct Professor'},
     {'name': 'Spanninger, Martha', 'title': 'Adjunct Faculty'},
     {'name': 'Stabiner, Karen', 'title': 'Adjunct Faculty'},
     {'name': 'Stewart, James',
      'title': 'Bloomberg Professor of Business Journalism'},
     {'name': 'Stille, Alexander',
      'title': 'San Paolo Professor of International Journalism'},
     {'name': 'Subramanian, Sushma', 'title': 'Adjunct Faculty'},
     {'name': 'Surowicz, Simon', 'title': None},
     {'name': 'Templin, Jacob', 'title': None},
     {'name': 'Tenen, Dennis', 'title': 'Instructor, The Lede Program'},
     {'name': 'Topping, Seymour  ',
      'title': 'San Paolo Professor of International Journalism Emeritus'},
     {'name': 'Trivedi, Yogi ', 'title': 'Adjunct Professor'},
     {'name': 'Tsiantar, Dody ', 'title': 'Adjunct Faculty'},
     {'name': 'Tu, Duy Linh',
      'title': 'Associate Professor of Professional Practice & Director, Digital Media Program '},
     {'name': 'Tucher, Andie ',
      'title': 'Associate Professor; Director, Ph.D. Program'},
     {'name': 'Ventura, Michael', 'title': 'Adjunct Faculty'},
     {'name': 'Wald, Jonathan', 'title': 'Adjunt Faculty'},
     {'name': 'Wald, Richard',
      'title': 'Fred W. Friendly Professor of Professional Practice in Media Society Emeritus'},
     {'name': 'Wayne, Leslie', 'title': 'Adjunct Faculty'},
     {'name': 'Weiner, Jonathan ',
      'title': 'Maxwell M. Geffen Professor of Medical and Scientific Journalism '},
     {'name': 'West, Betsy ',
      'title': 'Fred W. Friendly Professor of Professional Practice in Media and Society'},
     {'name': 'Wheatley, Jr., William', 'title': 'Adjunct Faculty'},
     {'name': 'Wiggins, Chris', 'title': 'Instructor, The Lede Program'},
     {'name': 'Wilson, Duff', 'title': 'Adjunct Faculty'},
     {'name': 'Woodward, Tali ', 'title': 'Director, M.A. Program'},
     {'name': 'Wu, Tim',
      'title': 'Director of the Saul and Janice Poliak Center for the Study of First Amendment Issues'},
     {'name': 'Yu, Frederick T C.',
      'title': 'CBS Professor Emeritus International Journalism'},
     {'name': 'Zucker, John', 'title': 'Adjunct Faculty'},
     {'name': 'Zuckerman, Jocelyn Craugh ', 'title': 'Adjunct Faculty'}]
    
    
    In [59]:
    for item in profs:
        print(item['name'])
    
    
    
    
    Adkison, Abbey 
    Alarcón, Daniel
    Barclay, Dolores 
    Baum, Geraldine
    Bell, Emily
    Benedict, Helen 
    Bennet, John 
    Bennett, Rob
    Berman, Nina
    Blair, Gwenda 
    Blum, David 
    Bockelman, Matt
    Bodarky, George
    Bogdanich, Walt 
    Bourin, Lennart
    Bradley, Theresa
    Brainard, Curtis 
    Bruder, Jessica
    Burford, Melanie 
    Burleigh, Nina 
    Cabot, Heather
    Cabral, Elena 
    Canipe, Chris
    Casciato, Tom
    Cohen, Julie
    Cohen, Lisa R.
    Cohen, Sarah
    Coll, Steve
    Cooper, Ann
    Coronel, Sheila 
    Coyne , Kevin 
    Cross, June 
    Cunningham, Brent 
    DePalma, Anthony
    Deitsch, Richard
    Diamond, Becky
    Dinges, John
    Donahue, Kerry 
    Drew, Christopher 
    Edsall, Thomas B. 
    Einhorn, Cheryl
    Elliott, Justin 
    Epstein, Randi Hutter 
    Evans, Farrell 
    Ford, Constance Mitchell 
    Freedman, Samuel 
    Freeman, George 
    French, Howard 
    Fried, Stephen 
    Garcia, Mario
    Gezari, Vanessa
    Gilderman, Greg
    Gitlin, Todd
    Giudice, Barbara 
    Goldensohn, Marty
    Goldman, Ari 
    Goldstein, Jacob
    Grueskin, Bill
    Haburchak, Alan
    Hajdu, David 
    Hancock, LynNell
    Hansen, Mark
    Harris, Mark
    Hartenstein, Julie
    Heinzerling, Larry
    Herman, Tom 
    Hickey, Neil 
    Hoel, Lars 
    Hogan, Pamela
    Holloway, Marguerite 
    Hoyt, Michael 
    Isabel, Lonnie
    Jennings, Tom
    John, Richard R. 
    Jones, Matthew L. 
    Kann, Peter R. 
    Kantrowitz, Barbara 
    Karle, Stuart
    Karr, Rick
    Kellogg, David
    Kennedy, Lucy
    Kent, Thomas 
    Klatell, David
    Klein, Adam
    Kleman, Kim 
    Knee, Jonathan
    Konner, Joan
    Kozar, Matt
    Lehmann-Haupt, Christopher 
    Lemann, Nicholas
    Levenson, Jacob 
    Lipsky, Seth 
    Lombardi, Kristen
    Luhby, Tami
    Maciulis, Tony
    Maharidge, Dale 
    Mason, Tom
    Matloff, Judith 
    Maytal, Itai
    McCormick, David 
    McCray, Melvin
    McDonald, Erica
    McGregor, Susan E.
    Mencher, Melvin
    Merchant, Preston
    Mintz, James
    Morais, Betsy
    Nasar, Sylvia 
    Navasky, Victor
    Newman, Maria
    Nisenholtz, Martin
    Norton, Rob
    Nosheen, Habiba
    Ornstein, Charles
    Padawer , Ruth
    Parker, Diantha
    Parrish, Allison 
    Patel, Samir S.
    Paunescu, Delia 
    Perlman, Merrill
    Pollak, Lisa
    Pool-Eckert, Marquita
    Richardson, Lynda 
    Richardson, Whitney 
    Richman, Joe
    Robbins, Ed
    Roberts, Fletcher
    Sacha, Bob 
    Sataline, Suzanne
    Schapiro, Rich
    Schatz, Robin
    Schecter, B.J.
    Schellmann, Hilke
    Schoen, John
    Schoonmaker, Mary Ellen
    Schudson, Michael 
    Schumacher-Matos, Ed 
    Schwartz, Jack 
    Seave, Ava 
    Segnini, Giannina 
    Shanor, Donald
    Shapiro, Bruce
    Shapiro, Michael 
    Shihab-Eldin, Ahmed 
    Sicha, Choire
    Siegel, Lloyd
    Singer, Amy 
    Sliwa, Maria
    Solomon, Alisa
    Soma, Jonathan
    Sotomayor, Ernest
    Span, Paula 
    Spanninger, Martha
    Stabiner, Karen
    Stewart, James
    Stille, Alexander
    Subramanian, Sushma
    Surowicz, Simon
    Templin, Jacob
    Tenen, Dennis
    Topping, Seymour  
    Trivedi, Yogi 
    Tsiantar, Dody 
    Tu, Duy Linh
    Tucher, Andie 
    Ventura, Michael
    Wald, Jonathan
    Wald, Richard
    Wayne, Leslie
    Weiner, Jonathan 
    West, Betsy 
    Wheatley, Jr., William
    Wiggins, Chris
    Wilson, Duff
    Woodward, Tali 
    Wu, Tim
    Yu, Frederick T C.
    Zucker, John
    Zuckerman, Jocelyn Craugh 
    

    String Indexing

    print all of the professors whose last name start with M

    
    
    In [61]:
    # print all of the professors whose last name start with 'M'
    m_profs = []
    mcount = 0
    for item in profs:
        prof_name = item['name']
        if prof_name[0]=='M':
            print(item['name'])
            mcount += 1 #mcount= mcount+1
            
    print(mcount)
    
    
    
    
    Maciulis, Tony
    Maharidge, Dale 
    Mason, Tom
    Matloff, Judith 
    Maytal, Itai
    McCormick, David 
    McCray, Melvin
    McDonald, Erica
    McGregor, Susan E.
    Mencher, Melvin
    Merchant, Preston
    Mintz, James
    Morais, Betsy
    13
    
    
    
    In [65]:
    # find all of the professors listed as "Adjunct Faculty"
    adjunct_profs = []
    #same as where clause
    mcount=0
    for item in profs:
        if item['title'] is not None and ("Adjunct" in item['title']):
            adjunct_profs.append(item)
    len(adjunct_profs)
    
    
    
    
    Out[65]:
    110
    
    
    In [ ]:
    for item in profs:
        if item['title'] is not None and ("Adjunct" in item['title']: adjunct_profs.append(item)
    
    
    
    In [64]:
    message = "bungalow"
    message[0]
    
    
    
    
    Out[64]:
    'b'
    
    
    In [65]:
    message[2:6]
    
    
    
    
    Out[65]:
    'ngal'
    
    
    In [66]:
    message[-1]
    
    
    
    
    Out[66]:
    'w'
    
    
    In [67]:
    message[0:3]
    
    
    
    
    Out[67]:
    'bun'
    
    
    In [68]:
    message[:3]
    
    
    
    
    Out[68]:
    'bun'
    
    
    In [69]:
    message[4:]
    
    
    
    
    Out[69]:
    'alow'
    
    
    In [70]:
    message[-5:-2]
    
    
    
    
    Out[70]:
    'gal'

    lost count of asides

    
    
    In [74]:
    x=5
    
    
    
    In [75]:
    x
    
    
    
    
    Out[75]:
    5
    
    
    In [76]:
    x = x-1
    
    
    
    In [77]:
    x
    
    
    
    
    Out[77]:
    4
    
    
    In [78]:
    x -= 1
    
    
    
    In [79]:
    x
    
    
    
    
    Out[79]:
    3
    
    
    In [80]:
    x *=2
    
    
    
    In [81]:
    x
    
    
    
    
    Out[81]:
    6
    
    
    In [ ]: