Scrapin' the Webz



In [4]:

    
!pip3 install bs4









    



Requirement already satisfied (use --upgrade to upgrade): bs4 in /usr/local/lib/python3.5/site-packages
Requirement already satisfied (use --upgrade to upgrade): beautifulsoup4 in /usr/local/lib/python3.5/site-packages (from bs4)



In [5]:

    
from bs4 import BeautifulSoup



In [6]:

    
from urllib.request import urlopen
html_str = urlopen("http://static.decontextualize.com/kittens.html").read()



In [7]:

    
print(html_str)









    



b'<!doctype html>\n<html>\n\t<head>\n\t\t<title>Kittens!</title>\n\t\t<style type="text/css">\n\t\t\tspan.lastcheckup { font-family: "Courier", fixed; font-size: 11px; }\n\t\t</style>\n\t</head>\n\t<body>\n\t\t<h1>Kittens and the TV Shows They Love</h1>\n\t\t<div class="kitten">\n\t\t\t<h2>Fluffy</h2>\n\t\t\t<div><img src="http://placekitten.com/120/120"></div>\n\t\t\t<ul class="tvshows">\n\t\t\t\t<li>\n\t\t\t\t\t<a href="http://www.imdb.com/title/tt0106145/">Deep Space Nine</a>\n\t\t\t\t</li>\n\t\t\t\t<li>\n\t\t\t\t\t<a href="http://www.imdb.com/title/tt0088576/">Mr. Belvedere</a>\n\t\t\t\t</li>\n\t\t\t</ul>\n\t\t\tLast check-up: <span class="lastcheckup">2014-01-17</span>\n\t\t</div>\n\t\t<div class="kitten">\n\t\t\t<h2>Monsieur Whiskeurs</h2>\n\t\t\t<div><img src="http://placekitten.com/110/110"></div>\n\t\t\t<ul class="tvshows">\n\t\t\t\t<li>\n\t\t\t\t\t<a href="http://www.imdb.com/title/tt0106179/">The X-Files</a>\n\t\t\t\t</li>\n\t\t\t\t<li>\n\t\t\t\t\t<a href="http://www.imdb.com/title/tt0098800/">Fresh Prince</a>\n\t\t\t\t</li>\n\t\t\t</ul>\n\t\t\tLast check-up: <span class="lastcheckup">2013-11-02</span>\n\t\t</div>\n\t</body>\n</html>\n\n'



In [8]:

    
document = BeautifulSoup(html_str,"html.parser")



In [9]:

    
type(document)









    Out[9]:





bs4.BeautifulSoup



In [10]:

    
h1_tag = document.find('h1')



In [11]:

    
h1_tag.string









    Out[11]:





'Kittens and the TV Shows They Love'



In [12]:

    
img_tag = document.find('img')



In [13]:

    
img_tag.string



In [14]:

    
img_tag('src')









    Out[14]:





[]



In [15]:

    
img_tag['src']









    Out[15]:





'http://placekitten.com/120/120'



In [16]:

    
document.find_all('img')









    Out[16]:





[<img src="http://placekitten.com/120/120"/>,
 <img src="http://placekitten.com/110/110"/>]



In [17]:

    
img_tags=document.find_all('img')



In [18]:

    
type(img_tags)









    Out[18]:





bs4.element.ResultSet



In [19]:

    
first_img = img_tags[0]



In [20]:

    
first_img['src']









    Out[20]:





'http://placekitten.com/120/120'



In [21]:

    
second_img = img_tags[1]



In [22]:

    
second_img['src']









    Out[22]:





'http://placekitten.com/110/110'



In [23]:

    
for item in img_tags:
    print(item['src'])









    



http://placekitten.com/120/120
http://placekitten.com/110/110



In [24]:

    
h2_tags = document.find_all('h2')
for item in h2_tags:
    print(item.string)









    



Fluffy
Monsieur Whiskeurs



In [25]:

    
checkups = document.find_all('span',{'class':'lastcheckup'})
for item in checkups:
    print(item.string)



In [26]:

    
kittens = document.find_all('div', {'class': 'kitten'})
for item in kittens:
    h2_tag = item.find('h2')
    print(h2_tag.string)
    checkup = item.find('span')
    print(checkup.string)









    



Fluffy
2014-01-17
Monsieur Whiskeurs
2013-11-02



In [27]:

    
kittens = document.find_all('div', {'class': 'kitten'})



In [28]:

    
first_kitten = kittens[0]
first_kitten_h2 = first_kitten.find('h2')
print(first_kitten_h2.string)









    



Fluffy



In [29]:

    
planets = ["Mercury", "Venus", "Earth", "Mars", "Jupiter", "Saturn", "Uranus", "Neptune"]



In [30]:

    
separator = ","



In [31]:

    
separator.join(planets)









    Out[31]:





'Mercury,Venus,Earth,Mars,Jupiter,Saturn,Uranus,Neptune'

But first, an aside about joining strings



In [32]:

    
print("&\n".join(planets))









    



Mercury&
Venus&
Earth&
Mars&
Jupiter&
Saturn&
Uranus&
Neptune



In [33]:

    
print("&\n".join(planets[:4]))









    



Mercury&
Venus&
Earth&
Mars



In [34]:

    
kittens = document.find_all('div', {'class': 'kitten'})
for item in kittens:
    h2_tag = item.find('h2')
    print(h2_tag.string)
    a_tags = item.find_all('a') #anchor tag, ancestor
    all_shows_str = [] #create a new list
    for a_tag_item in a_tags:
        #print("-", a_tag_item.string)
        tag_str = a_tag_item.string
        all_shows_str.append(tag_str)
        string_with_all_show_names = ",".join(all_shows_str)  
        print(h2_tag.string+ ":", string_with_all_show_names)









    



Fluffy
Fluffy: Deep Space Nine
Fluffy: Deep Space Nine,Mr. Belvedere
Monsieur Whiskeurs
Monsieur Whiskeurs: The X-Files
Monsieur Whiskeurs: The X-Files,Fresh Prince



In [35]:

    
kittens_data = list()#create an empty list
kittens = document.find_all('div', {'class': 'kitten'})
for item in kittens:
    h2_tag = item.find('h2')
    print(h2_tag.string)
    a_tags = item.find_all('a') #anchor tag, ancestor
    all_shows_str = [] #create a new list
    for a_tag_item in a_tags:
        #print("-", a_tag_item.string)
        tag_str = a_tag_item.string
        all_shows_str.append(tag_str)
        #1 create a dictionary and add to it the relevant key/value pairs
        #kitten_map = {}
        #kitten_map["name"] = h2_tag.string
        #kitten_map["tvshows"] = all_shows_str
        kitten_map = {"name":h2_tag.string, "tvshows":all_shows_str }
        #2 append that dictionary to the kittens_data
        string_with_all_show_names = ",".join(all_shows_str)  
        #print(h2_tag.string+ ":", string_with_all_show_names
kittens_data









    



Fluffy
Monsieur Whiskeurs






    Out[35]:





[]



In [36]:

    
kittens_data = list()#create an empty list
kittens = document.find_all('div', {'class': 'kitten'})
for item in kittens:
    h2_tag = item.find('h2')
    print(h2_tag.string)
    a_tags = item.find_all('a') #anchor tag, ancestor
    all_shows_str = []
    for a_tag_item in a_tags:
        tag_str = a_tag_item.string
        all_shows_str.append(tag_str)
    #create a dictionary adding kittens checkups
    checkup = item.find('span')# get the string with checkup.string
    kittens_data.append(
              {"name":h2_tag.string, 
                        "tvshows":all_shows_str,
                        "last_checkup": checkup.string})
kittens_data









    



Fluffy
Monsieur Whiskeurs






    Out[36]:





[{'last_checkup': '2014-01-17',
  'name': 'Fluffy',
  'tvshows': ['Deep Space Nine', 'Mr. Belvedere']},
 {'last_checkup': '2013-11-02',
  'name': 'Monsieur Whiskeurs',
  'tvshows': ['The X-Files', 'Fresh Prince']}]

Another Aside: lists and ...lists



In [37]:

    
Our next goal is to create a data structure that looks like this: 
    [
        {'name': 'Fluffy',
          'tv shows': ['Deep Space Nine', 'Mr.Belvedere']},
        {}









    



  File "<ipython-input-37-50d1845d425b>", line 1
    Our next goal is to create a data structure that looks like this:
           ^
SyntaxError: invalid syntax



In [38]:

    
x = ["a", "b", "c", "d"]



In [39]:

    
x[0]









    Out[39]:





'a'



In [40]:

    
x.append("e")



In [41]:

    
len(x)









    Out[41]:





5



In [42]:

    
x[4]









    Out[42]:





'e'



In [43]:

    
numbers = [1,2,3,4,5,6]
# end up with: [1,4,9,16,25,36]



In [44]:

    
squared = [item * item for item in numbers]
for item in numbers:
    s = item*item
    squared.append(s)



In [45]:

    
squared









    Out[45]:





[1, 4, 9, 16, 25, 36, 1, 4, 9, 16, 25, 36]



In [46]:

    
## Aside the Third: Making dictionaries
#declaring a dictionary
x = {'a':1, 'b':2, 'c':3}



In [47]:

    
#get a value out of a dictionary
x['a']









    Out[47]:





1



In [48]:

    
x.keys()









    Out[48]:





dict_keys(['b', 'c', 'a'])



In [49]:

    
for key in x.keys():
    print(key) #print out keys









    



b
c
a



In [50]:

    
# target: {1:1, 2:4, 3:9, 4:16, 5:25,...}
squares = {}
for n in range(1,11):
    squares[n] = n*n
squares









    Out[50]:





{1: 1, 2: 4, 3: 9, 4: 16, 5: 25, 6: 36, 7: 49, 8: 64, 9: 81, 10: 100}



In [51]:

    
squares[7]









    Out[51]:





49



In [52]:

    
names = ["Aaron", "Bob", "Caroline", "Daphne"]
#target: {"Aaron": 5} #show the name and how many characters each name has
name_length_map = {}#map is 
for item in names:
    name_length_map[item] = len(item)
name_length_map #evaluate the dictionary, Python 3 # take a list and create a new dictionary









    Out[52]:





{'Aaron': 5, 'Bob': 3, 'Caroline': 8, 'Daphne': 6}

Scraping the Faculty, how many percentage of the CJ faculty are adjunct faculty

our hypothesis: find all the

tags

inside the

tags, find the

inside the
, the name is the content of an
inside the
tags, find the
with class description ** the title of the professor is the content of that tag.



In [53]:

    
from urllib.request import urlopen
faculty_html = urlopen("http://www.journalism.columbia.edu/page/10/10?category_ids%5B%5D=2&category_ids%5B%5D=3&category_ids%5B%5D=37").read()



In [54]:

    
document = BeautifulSoup(faculty_html, "html.parser")



In [55]:

    
document.find('h2').string









    Out[55]:





' Full-Time, Adjunct & Visiting Faculty'



In [56]:

    
h2_tag = document.find('h2')
h2_tag.string









    Out[56]:





' Full-Time, Adjunct & Visiting Faculty'

very first task: print out the names of all the faculty members.



In [57]:

    
# this doesn't work, 
ul_tag = document.find('ul', {'class': 'experts-list'})
li_tags= ul_tag.find_all('li')
for item in li_tags:
    h4_tag = item.find('h4')
    if h4_tag: #none counts as false in python, only proceed if we actually found a h4-tag under li tags
        a_tag = h4_tag.find('a')#name of adjunct
        p_tag = item.find('p', {'class':'description'})#position of adjunct
        print(a_tag.string, "/", p_tag.string)









    



Adkison, Abbey  / Assistant Director, Multi-Media Journalism
Alarcón, Daniel / Assistant Professor of Broadcast Journalism
Barclay, Dolores  / Adjunct Faculty
Baum, Geraldine / Adjunct Faculty
Bell, Emily / Professor of Professional Practice & Director, Tow Center for Digital Journalism
Benedict, Helen  / Professor
Bennet, John  / Adjunct Faculty
Bennett, Rob / Adjunct Faculty
Berman, Nina / Associate Professor
Blair, Gwenda  / Adjunct Faculty
Blum, David  / Adjunct Faculty
Bockelman, Matt / None
Bodarky, George / Adjunct Assistant Professor 
Bogdanich, Walt  / Adjunct Faculty
Bourin, Lennart / Adjunct Faculty
Bradley, Theresa / Adjunct Faculty
Brainard, Curtis  / Staff Writer
Bruder, Jessica / Adjunct Faculty
Burford, Melanie  / Adjunct Faculty
Burleigh, Nina  / Adjunct Faculty
Cabot, Heather / Adjunct Professor
Cabral, Elena  / Adjunct Faculty & Assistant Director, Student Services
Canipe, Chris / None
Casciato, Tom / Adjunct Faculty
Cohen, Julie / Adjunct Faculty
Cohen, Lisa R. / Director, duPont/Professional Prizes; Adjunct Associate Professor
Cohen, Sarah / Adjunct Faculty
Coll, Steve / Dean & Henry R. Luce Professor of Journalism
Cooper, Ann / CBS Professor of Professional Practice in International Journalism
Coronel, Sheila  / Toni Stabile Professor of Professional Practice in Investigative Journalism; Director, Toni Stabile Center for Investigative Journalism, and Dean of Academic Affairs
Coyne , Kevin  / Adjunct Faculty
Cross, June  / Professor 
Cunningham, Brent  / Deputy Editor
DePalma, Anthony / Adjunct Faculty
Deitsch, Richard / Adjunct Faculty
Diamond, Becky / None
Dinges, John / Godfrey Lowell Cabot Professor Emeritus
Donahue, Kerry  / Adjunct Faculty & Director, Radio Program
Drew, Christopher  / Adjunct Faculty
Edsall, Thomas B.  / None
Einhorn, Cheryl / Adjunct Faculty
Elliott, Justin  / Adjunct Assistant Professor
Epstein, Randi Hutter  / Adjunct Faculty 
Evans, Farrell  / Adjunct Faculty
Ford, Constance Mitchell  / Adjunct Faculty
Freedman, Samuel  / Professor
Freeman, George  / Adjunct Faculty
French, Howard  / Associate Professor
Fried, Stephen  / Adjunct Faculty
Garcia, Mario / Senior Adviser on News Design/Adjunct Professor
Gezari, Vanessa / None
Gilderman, Greg / Adjunct Faculty
Gitlin, Todd / Professor & Chair, Ph.D. Program
Giudice, Barbara  / Adjunct Faculty
Goldensohn, Marty / Adjunct Faculty
Goldman, Ari  / Professor
Goldstein, Jacob / Adjunct Professor
Grueskin, Bill / Professor of Professional Practice 
Haburchak, Alan / Adjunct Faculty
Hajdu, David  / Associate Professor 
Hancock, LynNell / H. Gordon Garbedian Professor of Journalism & Director, Spencer Fellowship Program
Hansen, Mark / Director, David and Helen Gurley Brown Institute for Media Innovation & Professor of Journalism 
Harris, Mark / Adjunct Faculty
Hartenstein, Julie / Associate Dean
Heinzerling, Larry / Adjunct Faculty
Herman, Tom  / Adjunct Faculty
Hickey, Neil  / Adjunct Faculty
Hoel, Lars  / None
Hogan, Pamela / Adjunct Faculty
Holloway, Marguerite  / Associate Professor of Professional Practice and Director, Science & Environmental Journalism
Hoyt, Michael  / Adjunct Faculty
Isabel, Lonnie / Senior Lecturer in Discipline
Jennings, Tom / Adjunct Faculty
John, Richard R.  / Professor of History and Communications
Jones, Matthew L.  / Instructor, The Lede Program
Kann, Peter R.  / Adjunct Faculty
Kantrowitz, Barbara  / Adjunct Professor
Karle, Stuart / Adjunct Faculty; William J. Brennan Jr. Visiting Professor of First Amendment Issues
Karr, Rick / Adjunct Faculty
Kellogg, David / Adjunct Faculty
Kennedy, Lucy / Adjunct Faculty
Kent, Thomas  / Adjunct Faculty
Klatell, David / Professor of Professional Practice & Chair, International Studies
Klein, Adam / Adjunct Professor
Kleman, Kim  / Adjunct Faculty
Knee, Jonathan / Adjunct Professor
Konner, Joan / Dean Emerita
Kozar, Matt / Adjunct Faculty
Lehmann-Haupt, Christopher  / Adjunct Faculty
Lemann, Nicholas / Joseph Pulitzer II and Edith Pulitzer Moore Professor of Journalism; Dean Emeritus
Levenson, Jacob  / Adjunct Faculty
Lipsky, Seth  / Adjunct Faculty
Lombardi, Kristen / Adjunct Faculty
Luhby, Tami / Adjunct Faculty
Maciulis, Tony / Adjunct Faculty
Maharidge, Dale  / Professor 
Mason, Tom / None
Matloff, Judith  / Adjunct faculty
Maytal, Itai / Adjunct Faculty
McCormick, David  / Adjunct Faculty
McCray, Melvin / Adjunct Faculty
McDonald, Erica / None
McGregor, Susan E. / Assistant Professor & Assistant Director, Tow Center for Digital Journalism
Mencher, Melvin / Professor Emeritus
Merchant, Preston / None
Mintz, James / Adjunct Faculty
Morais, Betsy / Adjunct Faculty
Nasar, Sylvia  / John S. and James L. Knight Professor of Business Journalism
Navasky, Victor / George T. Delacorte Professor in Magazine Journalism Emeritus
Newman, Maria / Adjunct Faculty
Nisenholtz, Martin / Adjunct Professor 
Norton, Rob / Adjunct Faculty
Nosheen, Habiba / Adjunct Professor
Ornstein, Charles / Adjunct Faculty
Padawer , Ruth / Adjunct Professor
Parker, Diantha / Adjunct Faculty
Parrish, Allison  / Instructor, The Lede Program
Patel, Samir S. / Adjunct Faculty
Paunescu, Delia  / None
Perlman, Merrill / Adjunct Faculty
Pollak, Lisa / Adjunct Faculty
Pool-Eckert, Marquita / Adjunct Faculty
Richardson, Lynda  / Adjunct Faculty
Richardson, Whitney  / Adjunct Professor
Richman, Joe / Adjunct Faculty
Robbins, Ed / Adjunct Faculty
Roberts, Fletcher / Adjunct Faculty
Sacha, Bob  / Adjunct Faculty
Sataline, Suzanne / Adjunct Faculty
Schapiro, Rich / Adjunct Faculty
Schatz, Robin / Adjunct Faculty
Schecter, B.J. / Adjunct Faculty
Schellmann, Hilke / Adjunct Faculty
Schoen, John / Adjunct Faculty
Schoonmaker, Mary Ellen / Adjunct Faculty
Schudson, Michael  / Professor 
Schumacher-Matos, Ed  / Adjunct Faculty
Schwartz, Jack  / None
Seave, Ava  / Adjunct Faculty
Segnini, Giannina  / Director of the Data Concentration Program
Shanor, Donald / G. L. Cabot Professor Emeritus 
Shapiro, Bruce / Executive Director, Dart Center for Journalism and Trauma and Senior Advisor for Academic Affairs
Shapiro, Michael  / Professor 
Shihab-Eldin, Ahmed  / Adjunct Assistant Professor 
Sicha, Choire / Adjunct Professor
Siegel, Lloyd / Adjunct Faculty
Singer, Amy  / Adjunct Faculty
Sliwa, Maria / Adjunct Faculty
Solomon, Alisa / Professor & Director, Arts Concentration, M.A. Program
Soma, Jonathan / Director, The Lede Program
Sotomayor, Ernest / Dean of Student Affairs & Director, Latin American Initiatives
Span, Paula  / Adjunct Professor
Spanninger, Martha / Adjunct Faculty
Stabiner, Karen / Adjunct Faculty
Stewart, James / Bloomberg Professor of Business Journalism
Stille, Alexander / San Paolo Professor of International Journalism
Subramanian, Sushma / Adjunct Faculty
Surowicz, Simon / None
Templin, Jacob / None
Tenen, Dennis / Instructor, The Lede Program
Topping, Seymour   / San Paolo Professor of International Journalism Emeritus
Trivedi, Yogi  / Adjunct Professor
Tsiantar, Dody  / Adjunct Faculty
Tu, Duy Linh / Associate Professor of Professional Practice & Director, Digital Media Program 
Tucher, Andie  / Associate Professor; Director, Ph.D. Program
Ventura, Michael / Adjunct Faculty
Wald, Jonathan / Adjunt Faculty
Wald, Richard / Fred W. Friendly Professor of Professional Practice in Media Society Emeritus
Wayne, Leslie / Adjunct Faculty
Weiner, Jonathan  / Maxwell M. Geffen Professor of Medical and Scientific Journalism 
West, Betsy  / Fred W. Friendly Professor of Professional Practice in Media and Society
Wheatley, Jr., William / Adjunct Faculty
Wiggins, Chris / Instructor, The Lede Program
Wilson, Duff / Adjunct Faculty
Woodward, Tali  / Director, M.A. Program
Wu, Tim / Director of the Saul and Janice Poliak Center for the Study of First Amendment Issues
Yu, Frederick T C. / CBS Professor Emeritus International Journalism
Zucker, John / Adjunct Faculty
Zuckerman, Jocelyn Craugh  / Adjunct Faculty

Now, we want to make a list of dictionaries of faculty members along with their titles [{'name': 'Bodarky George', 'title': 'Adjunct Assistant Professor '}, {'name':''}]



In [58]:

    
profs = []
ul_tag = document.find('ul', {'class': 'experts-list'})
li_tags= ul_tag.find_all('li')
for item in li_tags:
    h4_tag = item.find('h4')
    if h4_tag: #none counts as false in python, only proceed if we actually found a h4-tag under li tags
        a_tag = h4_tag.find('a')
        p_tag = item.find('p', {'class':'description'})
        prof_map = {'name': a_tag.string, 'title': p_tag.string}
        profs.append(prof_map)
profs









    Out[58]:





[{'name': 'Adkison, Abbey ',
  'title': 'Assistant Director, Multi-Media Journalism'},
 {'name': 'Alarcón, Daniel',
  'title': 'Assistant Professor of Broadcast Journalism'},
 {'name': 'Barclay, Dolores ', 'title': 'Adjunct Faculty'},
 {'name': 'Baum, Geraldine', 'title': 'Adjunct Faculty'},
 {'name': 'Bell, Emily',
  'title': 'Professor of Professional Practice & Director, Tow Center for Digital Journalism'},
 {'name': 'Benedict, Helen ', 'title': 'Professor'},
 {'name': 'Bennet, John ', 'title': 'Adjunct Faculty'},
 {'name': 'Bennett, Rob', 'title': 'Adjunct Faculty'},
 {'name': 'Berman, Nina', 'title': 'Associate Professor'},
 {'name': 'Blair, Gwenda ', 'title': 'Adjunct Faculty'},
 {'name': 'Blum, David ', 'title': 'Adjunct Faculty'},
 {'name': 'Bockelman, Matt', 'title': None},
 {'name': 'Bodarky, George', 'title': 'Adjunct Assistant Professor '},
 {'name': 'Bogdanich, Walt ', 'title': 'Adjunct Faculty'},
 {'name': 'Bourin, Lennart', 'title': 'Adjunct Faculty'},
 {'name': 'Bradley, Theresa', 'title': 'Adjunct Faculty'},
 {'name': 'Brainard, Curtis ', 'title': 'Staff Writer'},
 {'name': 'Bruder, Jessica', 'title': 'Adjunct Faculty'},
 {'name': 'Burford, Melanie ', 'title': 'Adjunct Faculty'},
 {'name': 'Burleigh, Nina ', 'title': 'Adjunct Faculty'},
 {'name': 'Cabot, Heather', 'title': 'Adjunct Professor'},
 {'name': 'Cabral, Elena ',
  'title': 'Adjunct Faculty & Assistant Director, Student Services'},
 {'name': 'Canipe, Chris', 'title': None},
 {'name': 'Casciato, Tom', 'title': 'Adjunct Faculty'},
 {'name': 'Cohen, Julie', 'title': 'Adjunct Faculty'},
 {'name': 'Cohen, Lisa R.',
  'title': 'Director, duPont/Professional Prizes; Adjunct Associate Professor'},
 {'name': 'Cohen, Sarah', 'title': 'Adjunct Faculty'},
 {'name': 'Coll, Steve',
  'title': 'Dean & Henry R. Luce Professor of Journalism'},
 {'name': 'Cooper, Ann',
  'title': 'CBS Professor of Professional Practice in International Journalism'},
 {'name': 'Coronel, Sheila ',
  'title': 'Toni Stabile Professor of Professional Practice in Investigative Journalism; Director, Toni Stabile Center for Investigative Journalism, and Dean of Academic Affairs'},
 {'name': 'Coyne , Kevin ', 'title': 'Adjunct Faculty'},
 {'name': 'Cross, June ', 'title': 'Professor '},
 {'name': 'Cunningham, Brent ', 'title': 'Deputy Editor'},
 {'name': 'DePalma, Anthony', 'title': 'Adjunct Faculty'},
 {'name': 'Deitsch, Richard', 'title': 'Adjunct Faculty'},
 {'name': 'Diamond, Becky', 'title': None},
 {'name': 'Dinges, John', 'title': 'Godfrey Lowell Cabot Professor Emeritus'},
 {'name': 'Donahue, Kerry ',
  'title': 'Adjunct Faculty & Director, Radio Program'},
 {'name': 'Drew, Christopher ', 'title': 'Adjunct Faculty'},
 {'name': 'Edsall, Thomas B. ', 'title': None},
 {'name': 'Einhorn, Cheryl', 'title': 'Adjunct Faculty'},
 {'name': 'Elliott, Justin ', 'title': 'Adjunct Assistant Professor'},
 {'name': 'Epstein, Randi Hutter ', 'title': 'Adjunct Faculty '},
 {'name': 'Evans, Farrell ', 'title': 'Adjunct Faculty'},
 {'name': 'Ford, Constance Mitchell ', 'title': 'Adjunct Faculty'},
 {'name': 'Freedman, Samuel ', 'title': 'Professor'},
 {'name': 'Freeman, George ', 'title': 'Adjunct Faculty'},
 {'name': 'French, Howard ', 'title': 'Associate Professor'},
 {'name': 'Fried, Stephen ', 'title': 'Adjunct Faculty'},
 {'name': 'Garcia, Mario',
  'title': 'Senior Adviser on News Design/Adjunct Professor'},
 {'name': 'Gezari, Vanessa', 'title': None},
 {'name': 'Gilderman, Greg', 'title': 'Adjunct Faculty'},
 {'name': 'Gitlin, Todd', 'title': 'Professor & Chair, Ph.D. Program'},
 {'name': 'Giudice, Barbara ', 'title': 'Adjunct Faculty'},
 {'name': 'Goldensohn, Marty', 'title': 'Adjunct Faculty'},
 {'name': 'Goldman, Ari ', 'title': 'Professor'},
 {'name': 'Goldstein, Jacob', 'title': 'Adjunct Professor'},
 {'name': 'Grueskin, Bill', 'title': 'Professor of Professional Practice '},
 {'name': 'Haburchak, Alan', 'title': 'Adjunct Faculty'},
 {'name': 'Hajdu, David ', 'title': 'Associate Professor '},
 {'name': 'Hancock, LynNell',
  'title': 'H. Gordon Garbedian Professor of Journalism & Director, Spencer Fellowship Program'},
 {'name': 'Hansen, Mark',
  'title': 'Director, David and Helen Gurley Brown Institute for Media Innovation & Professor of Journalism '},
 {'name': 'Harris, Mark', 'title': 'Adjunct Faculty'},
 {'name': 'Hartenstein, Julie', 'title': 'Associate Dean'},
 {'name': 'Heinzerling, Larry', 'title': 'Adjunct Faculty'},
 {'name': 'Herman, Tom ', 'title': 'Adjunct Faculty'},
 {'name': 'Hickey, Neil ', 'title': 'Adjunct Faculty'},
 {'name': 'Hoel, Lars ', 'title': None},
 {'name': 'Hogan, Pamela', 'title': 'Adjunct Faculty'},
 {'name': 'Holloway, Marguerite ',
  'title': 'Associate Professor of Professional Practice and Director, Science & Environmental Journalism'},
 {'name': 'Hoyt, Michael ', 'title': 'Adjunct Faculty'},
 {'name': 'Isabel, Lonnie', 'title': 'Senior Lecturer in Discipline'},
 {'name': 'Jennings, Tom', 'title': 'Adjunct Faculty'},
 {'name': 'John, Richard R. ',
  'title': 'Professor of History and Communications'},
 {'name': 'Jones, Matthew L. ', 'title': 'Instructor, The Lede Program'},
 {'name': 'Kann, Peter R. ', 'title': 'Adjunct Faculty'},
 {'name': 'Kantrowitz, Barbara ', 'title': 'Adjunct Professor'},
 {'name': 'Karle, Stuart',
  'title': 'Adjunct Faculty; William J. Brennan Jr. Visiting Professor of First Amendment Issues'},
 {'name': 'Karr, Rick', 'title': 'Adjunct Faculty'},
 {'name': 'Kellogg, David', 'title': 'Adjunct Faculty'},
 {'name': 'Kennedy, Lucy', 'title': 'Adjunct Faculty'},
 {'name': 'Kent, Thomas ', 'title': 'Adjunct Faculty'},
 {'name': 'Klatell, David',
  'title': 'Professor of Professional Practice & Chair, International Studies'},
 {'name': 'Klein, Adam', 'title': 'Adjunct Professor'},
 {'name': 'Kleman, Kim ', 'title': 'Adjunct Faculty'},
 {'name': 'Knee, Jonathan', 'title': 'Adjunct Professor'},
 {'name': 'Konner, Joan', 'title': 'Dean Emerita'},
 {'name': 'Kozar, Matt', 'title': 'Adjunct Faculty'},
 {'name': 'Lehmann-Haupt, Christopher ', 'title': 'Adjunct Faculty'},
 {'name': 'Lemann, Nicholas',
  'title': 'Joseph Pulitzer II and Edith Pulitzer Moore Professor of Journalism; Dean Emeritus'},
 {'name': 'Levenson, Jacob ', 'title': 'Adjunct Faculty'},
 {'name': 'Lipsky, Seth ', 'title': 'Adjunct Faculty'},
 {'name': 'Lombardi, Kristen', 'title': 'Adjunct Faculty'},
 {'name': 'Luhby, Tami', 'title': 'Adjunct Faculty'},
 {'name': 'Maciulis, Tony', 'title': 'Adjunct Faculty'},
 {'name': 'Maharidge, Dale ', 'title': 'Professor '},
 {'name': 'Mason, Tom', 'title': None},
 {'name': 'Matloff, Judith ', 'title': 'Adjunct faculty'},
 {'name': 'Maytal, Itai', 'title': 'Adjunct Faculty'},
 {'name': 'McCormick, David ', 'title': 'Adjunct Faculty'},
 {'name': 'McCray, Melvin', 'title': 'Adjunct Faculty'},
 {'name': 'McDonald, Erica', 'title': None},
 {'name': 'McGregor, Susan E.',
  'title': 'Assistant Professor & Assistant Director, Tow Center for Digital Journalism'},
 {'name': 'Mencher, Melvin', 'title': 'Professor Emeritus'},
 {'name': 'Merchant, Preston', 'title': None},
 {'name': 'Mintz, James', 'title': 'Adjunct Faculty'},
 {'name': 'Morais, Betsy', 'title': 'Adjunct Faculty'},
 {'name': 'Nasar, Sylvia ',
  'title': 'John S. and James L. Knight Professor of Business Journalism'},
 {'name': 'Navasky, Victor',
  'title': 'George T. Delacorte Professor in Magazine Journalism Emeritus'},
 {'name': 'Newman, Maria', 'title': 'Adjunct Faculty'},
 {'name': 'Nisenholtz, Martin', 'title': 'Adjunct Professor '},
 {'name': 'Norton, Rob', 'title': 'Adjunct Faculty'},
 {'name': 'Nosheen, Habiba', 'title': 'Adjunct Professor'},
 {'name': 'Ornstein, Charles', 'title': 'Adjunct Faculty'},
 {'name': 'Padawer , Ruth', 'title': 'Adjunct Professor'},
 {'name': 'Parker, Diantha', 'title': 'Adjunct Faculty'},
 {'name': 'Parrish, Allison ', 'title': 'Instructor, The Lede Program'},
 {'name': 'Patel, Samir S.', 'title': 'Adjunct Faculty'},
 {'name': 'Paunescu, Delia ', 'title': None},
 {'name': 'Perlman, Merrill', 'title': 'Adjunct Faculty'},
 {'name': 'Pollak, Lisa', 'title': 'Adjunct Faculty'},
 {'name': 'Pool-Eckert, Marquita', 'title': 'Adjunct Faculty'},
 {'name': 'Richardson, Lynda ', 'title': 'Adjunct Faculty'},
 {'name': 'Richardson, Whitney ', 'title': 'Adjunct Professor'},
 {'name': 'Richman, Joe', 'title': 'Adjunct Faculty'},
 {'name': 'Robbins, Ed', 'title': 'Adjunct Faculty'},
 {'name': 'Roberts, Fletcher', 'title': 'Adjunct Faculty'},
 {'name': 'Sacha, Bob ', 'title': 'Adjunct Faculty'},
 {'name': 'Sataline, Suzanne', 'title': 'Adjunct Faculty'},
 {'name': 'Schapiro, Rich', 'title': 'Adjunct Faculty'},
 {'name': 'Schatz, Robin', 'title': 'Adjunct Faculty'},
 {'name': 'Schecter, B.J.', 'title': 'Adjunct Faculty'},
 {'name': 'Schellmann, Hilke', 'title': 'Adjunct Faculty'},
 {'name': 'Schoen, John', 'title': 'Adjunct Faculty'},
 {'name': 'Schoonmaker, Mary Ellen', 'title': 'Adjunct Faculty'},
 {'name': 'Schudson, Michael ', 'title': 'Professor '},
 {'name': 'Schumacher-Matos, Ed ', 'title': 'Adjunct Faculty'},
 {'name': 'Schwartz, Jack ', 'title': None},
 {'name': 'Seave, Ava ', 'title': 'Adjunct Faculty'},
 {'name': 'Segnini, Giannina ',
  'title': 'Director of the Data Concentration Program'},
 {'name': 'Shanor, Donald', 'title': 'G. L. Cabot Professor Emeritus '},
 {'name': 'Shapiro, Bruce',
  'title': 'Executive Director, Dart Center for Journalism and Trauma and Senior Advisor for Academic Affairs'},
 {'name': 'Shapiro, Michael ', 'title': 'Professor '},
 {'name': 'Shihab-Eldin, Ahmed ', 'title': 'Adjunct Assistant Professor '},
 {'name': 'Sicha, Choire', 'title': 'Adjunct Professor'},
 {'name': 'Siegel, Lloyd', 'title': 'Adjunct Faculty'},
 {'name': 'Singer, Amy ', 'title': 'Adjunct Faculty'},
 {'name': 'Sliwa, Maria', 'title': 'Adjunct Faculty'},
 {'name': 'Solomon, Alisa',
  'title': 'Professor & Director, Arts Concentration, M.A. Program'},
 {'name': 'Soma, Jonathan', 'title': 'Director, The Lede Program'},
 {'name': 'Sotomayor, Ernest',
  'title': 'Dean of Student Affairs & Director, Latin American Initiatives'},
 {'name': 'Span, Paula ', 'title': 'Adjunct Professor'},
 {'name': 'Spanninger, Martha', 'title': 'Adjunct Faculty'},
 {'name': 'Stabiner, Karen', 'title': 'Adjunct Faculty'},
 {'name': 'Stewart, James',
  'title': 'Bloomberg Professor of Business Journalism'},
 {'name': 'Stille, Alexander',
  'title': 'San Paolo Professor of International Journalism'},
 {'name': 'Subramanian, Sushma', 'title': 'Adjunct Faculty'},
 {'name': 'Surowicz, Simon', 'title': None},
 {'name': 'Templin, Jacob', 'title': None},
 {'name': 'Tenen, Dennis', 'title': 'Instructor, The Lede Program'},
 {'name': 'Topping, Seymour  ',
  'title': 'San Paolo Professor of International Journalism Emeritus'},
 {'name': 'Trivedi, Yogi ', 'title': 'Adjunct Professor'},
 {'name': 'Tsiantar, Dody ', 'title': 'Adjunct Faculty'},
 {'name': 'Tu, Duy Linh',
  'title': 'Associate Professor of Professional Practice & Director, Digital Media Program '},
 {'name': 'Tucher, Andie ',
  'title': 'Associate Professor; Director, Ph.D. Program'},
 {'name': 'Ventura, Michael', 'title': 'Adjunct Faculty'},
 {'name': 'Wald, Jonathan', 'title': 'Adjunt Faculty'},
 {'name': 'Wald, Richard',
  'title': 'Fred W. Friendly Professor of Professional Practice in Media Society Emeritus'},
 {'name': 'Wayne, Leslie', 'title': 'Adjunct Faculty'},
 {'name': 'Weiner, Jonathan ',
  'title': 'Maxwell M. Geffen Professor of Medical and Scientific Journalism '},
 {'name': 'West, Betsy ',
  'title': 'Fred W. Friendly Professor of Professional Practice in Media and Society'},
 {'name': 'Wheatley, Jr., William', 'title': 'Adjunct Faculty'},
 {'name': 'Wiggins, Chris', 'title': 'Instructor, The Lede Program'},
 {'name': 'Wilson, Duff', 'title': 'Adjunct Faculty'},
 {'name': 'Woodward, Tali ', 'title': 'Director, M.A. Program'},
 {'name': 'Wu, Tim',
  'title': 'Director of the Saul and Janice Poliak Center for the Study of First Amendment Issues'},
 {'name': 'Yu, Frederick T C.',
  'title': 'CBS Professor Emeritus International Journalism'},
 {'name': 'Zucker, John', 'title': 'Adjunct Faculty'},
 {'name': 'Zuckerman, Jocelyn Craugh ', 'title': 'Adjunct Faculty'}]



In [59]:

    
for item in profs:
    print(item['name'])









    



Adkison, Abbey 
Alarcón, Daniel
Barclay, Dolores 
Baum, Geraldine
Bell, Emily
Benedict, Helen 
Bennet, John 
Bennett, Rob
Berman, Nina
Blair, Gwenda 
Blum, David 
Bockelman, Matt
Bodarky, George
Bogdanich, Walt 
Bourin, Lennart
Bradley, Theresa
Brainard, Curtis 
Bruder, Jessica
Burford, Melanie 
Burleigh, Nina 
Cabot, Heather
Cabral, Elena 
Canipe, Chris
Casciato, Tom
Cohen, Julie
Cohen, Lisa R.
Cohen, Sarah
Coll, Steve
Cooper, Ann
Coronel, Sheila 
Coyne , Kevin 
Cross, June 
Cunningham, Brent 
DePalma, Anthony
Deitsch, Richard
Diamond, Becky
Dinges, John
Donahue, Kerry 
Drew, Christopher 
Edsall, Thomas B. 
Einhorn, Cheryl
Elliott, Justin 
Epstein, Randi Hutter 
Evans, Farrell 
Ford, Constance Mitchell 
Freedman, Samuel 
Freeman, George 
French, Howard 
Fried, Stephen 
Garcia, Mario
Gezari, Vanessa
Gilderman, Greg
Gitlin, Todd
Giudice, Barbara 
Goldensohn, Marty
Goldman, Ari 
Goldstein, Jacob
Grueskin, Bill
Haburchak, Alan
Hajdu, David 
Hancock, LynNell
Hansen, Mark
Harris, Mark
Hartenstein, Julie
Heinzerling, Larry
Herman, Tom 
Hickey, Neil 
Hoel, Lars 
Hogan, Pamela
Holloway, Marguerite 
Hoyt, Michael 
Isabel, Lonnie
Jennings, Tom
John, Richard R. 
Jones, Matthew L. 
Kann, Peter R. 
Kantrowitz, Barbara 
Karle, Stuart
Karr, Rick
Kellogg, David
Kennedy, Lucy
Kent, Thomas 
Klatell, David
Klein, Adam
Kleman, Kim 
Knee, Jonathan
Konner, Joan
Kozar, Matt
Lehmann-Haupt, Christopher 
Lemann, Nicholas
Levenson, Jacob 
Lipsky, Seth 
Lombardi, Kristen
Luhby, Tami
Maciulis, Tony
Maharidge, Dale 
Mason, Tom
Matloff, Judith 
Maytal, Itai
McCormick, David 
McCray, Melvin
McDonald, Erica
McGregor, Susan E.
Mencher, Melvin
Merchant, Preston
Mintz, James
Morais, Betsy
Nasar, Sylvia 
Navasky, Victor
Newman, Maria
Nisenholtz, Martin
Norton, Rob
Nosheen, Habiba
Ornstein, Charles
Padawer , Ruth
Parker, Diantha
Parrish, Allison 
Patel, Samir S.
Paunescu, Delia 
Perlman, Merrill
Pollak, Lisa
Pool-Eckert, Marquita
Richardson, Lynda 
Richardson, Whitney 
Richman, Joe
Robbins, Ed
Roberts, Fletcher
Sacha, Bob 
Sataline, Suzanne
Schapiro, Rich
Schatz, Robin
Schecter, B.J.
Schellmann, Hilke
Schoen, John
Schoonmaker, Mary Ellen
Schudson, Michael 
Schumacher-Matos, Ed 
Schwartz, Jack 
Seave, Ava 
Segnini, Giannina 
Shanor, Donald
Shapiro, Bruce
Shapiro, Michael 
Shihab-Eldin, Ahmed 
Sicha, Choire
Siegel, Lloyd
Singer, Amy 
Sliwa, Maria
Solomon, Alisa
Soma, Jonathan
Sotomayor, Ernest
Span, Paula 
Spanninger, Martha
Stabiner, Karen
Stewart, James
Stille, Alexander
Subramanian, Sushma
Surowicz, Simon
Templin, Jacob
Tenen, Dennis
Topping, Seymour  
Trivedi, Yogi 
Tsiantar, Dody 
Tu, Duy Linh
Tucher, Andie 
Ventura, Michael
Wald, Jonathan
Wald, Richard
Wayne, Leslie
Weiner, Jonathan 
West, Betsy 
Wheatley, Jr., William
Wiggins, Chris
Wilson, Duff
Woodward, Tali 
Wu, Tim
Yu, Frederick T C.
Zucker, John
Zuckerman, Jocelyn Craugh

String Indexing

print all of the professors whose last name start with M



In [61]:

    
# print all of the professors whose last name start with 'M'
m_profs = []
mcount = 0
for item in profs:
    prof_name = item['name']
    if prof_name[0]=='M':
        print(item['name'])
        mcount += 1 #mcount= mcount+1
        
print(mcount)









    



Maciulis, Tony
Maharidge, Dale 
Mason, Tom
Matloff, Judith 
Maytal, Itai
McCormick, David 
McCray, Melvin
McDonald, Erica
McGregor, Susan E.
Mencher, Melvin
Merchant, Preston
Mintz, James
Morais, Betsy
13



In [65]:

    
# find all of the professors listed as "Adjunct Faculty"
adjunct_profs = []
#same as where clause
mcount=0
for item in profs:
    if item['title'] is not None and ("Adjunct" in item['title']):
        adjunct_profs.append(item)
len(adjunct_profs)









    Out[65]:





110



In [ ]:

    
for item in profs:
    if item['title'] is not None and ("Adjunct" in item['title']: adjunct_profs.append(item)



In [64]:

    
message = "bungalow"
message[0]









    Out[64]:





'b'



In [65]:

    
message[2:6]









    Out[65]:





'ngal'



In [66]:

    
message[-1]









    Out[66]:





'w'



In [67]:

    
message[0:3]









    Out[67]:





'bun'



In [68]:

    
message[:3]









    Out[68]:





'bun'



In [69]:

    
message[4:]









    Out[69]:





'alow'



In [70]:

    
message[-5:-2]









    Out[70]:





'gal'

lost count of asides



In [74]:

    
x=5



In [75]:

    
x









    Out[75]:





5



In [76]:

    
x = x-1



In [77]:

    
x









    Out[77]:





4



In [78]:

    
x -= 1



In [79]:

    
x









    Out[79]:





3



In [80]:

    
x *=2



In [81]:

    
x









    Out[81]:





6



In [ ]: