In [176]:
import math
help(math.sqrt)


Help on built-in function sqrt in module math:

sqrt(...)
    sqrt(x)
    
    Return the square root of x.


In [177]:
'c' in 'abc'


Out[177]:
True

In [178]:
s = "learn to program"

In [179]:
s[0:]


Out[179]:
'learn to program'

In [180]:
s[-3:]


Out[180]:
'ram'

In [181]:
s = s[:5] + 'ed' + s[5:]

In [182]:
wr = "Hi there dUde"

In [183]:
wr.lower()


Out[183]:
'hi there dude'

In [184]:
dir(wr)


Out[184]:
['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__getslice__',
 '__gt__',
 '__hash__',
 '__init__',
 '__le__',
 '__len__',
 '__lt__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmod__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_formatter_field_name_split',
 '_formatter_parser',
 'capitalize',
 'center',
 'count',
 'decode',
 'encode',
 'endswith',
 'expandtabs',
 'find',
 'format',
 'index',
 'isalnum',
 'isalpha',
 'isdigit',
 'islower',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'lower',
 'lstrip',
 'partition',
 'replace',
 'rfind',
 'rindex',
 'rjust',
 'rpartition',
 'rsplit',
 'rstrip',
 'split',
 'splitlines',
 'startswith',
 'strip',
 'swapcase',
 'title',
 'translate',
 'upper',
 'zfill']

In [185]:
help(str.rfind)


Help on method_descriptor:

rfind(...)
    S.rfind(sub [,start [,end]]) -> int
    
    Return the highest index in S where substring sub is found,
    such that sub is contained within S[start:end].  Optional
    arguments start and end are interpreted as in slice notation.
    
    Return -1 on failure.


In [186]:
help(str.strip)


Help on method_descriptor:

strip(...)
    S.strip([chars]) -> string or unicode
    
    Return a copy of the string S with leading and trailing
    whitespace removed.
    If chars is given and not None, remove characters in chars instead.
    If chars is unicode, S will be converted to unicode before stripping


In [187]:
wr.capitalize()


Out[187]:
'Hi there dude'

In [188]:
def letter_by_letter(wr):
    for char in wr:
        print char
        print char

In [189]:
letter_by_letter('abc')


a
a
b
b
c
c

In [190]:
def count_vowels(string):
    running_sum = 0
    for char in string.lower():
        if char in 'aeiou':
            running_sum = running_sum + 1
            
    return running_sum

In [191]:
count_vowels('abA')


Out[191]:
2

In [192]:
grades = [50, 60, 75]

In [193]:
grades[0] # this is a value


Out[193]:
50

In [194]:
grades[0:1] # this is a list


Out[194]:
[50]

In [195]:
max(grades)


Out[195]:
75

In [203]:
students = ['Bob', 'Harry', 'Gilford']

In [204]:
max(students)


Out[204]:
'Harry'

In [202]:
float(sum(grades)) / float(len(grades))


Out[202]:
61.666666666666664

In [200]:
grades.append(75)

In [211]:
def wow(mylist):
    collect = ''
    for string in mylist:
        collect = collect + ' ' + string
        
    return collect.lstrip()

In [212]:
wow(students)


Out[212]:
'Bob Harry Gilford'

In [216]:
students.sort()
wow(students)


Out[216]:
'Bob Gilford Harry'

In [31]:
class Wallet:
    def __init__(self, owner):
        self.cash = 0
        self.owner = owner
    
    def addCash(self, amount):
        self.cash = self.cash + amount
        
    def printWallet(self):
        print "Wallet belongs to {}, and has {}$".format(self.owner, str(self.cash))

In [32]:
w = Wallet("Bob")
w.printWallet()


Wallet belongs to Bob, and has 0$

In [33]:
w.addCash(10)
w.printWallet()


Wallet belongs to Bob, and has 10$

In [34]:
help(range)


Help on built-in function range in module __builtin__:

range(...)
    range(stop) -> list of integers
    range(start, stop[, step]) -> list of integers
    
    Return a list containing an arithmetic progression of integers.
    range(i, j) returns [i, i+1, i+2, ..., j-1]; start (!) defaults to 0.
    When step is given, it specifies the increment (or decrement).
    For example, range(4) returns [0, 1, 2, 3].  The end point is omitted!
    These are exactly the valid indices for a list of 4 elements.


In [36]:
range(1,


Out[36]:
[1, 4, 7]

In [27]:
help(repr)


Help on built-in function repr in module __builtin__:

repr(...)
    repr(object) -> string
    
    Return the canonical string representation of the object.
    For most object types, eval(repr(object)) == object.


In [28]:
repr(w)


Out[28]:
'<__main__.Wallet instance at 0x900568c>'

In [29]:
repr(3)


Out[29]:
'3'

In [30]:
repr('3')


Out[30]:
"'3'"

In [39]:
import json

In [40]:
json.dumps(['a'])


Out[40]:
'["a"]'

In [1]:
print "I'm the worst developer {}".format("ever")


I'm the worst developer ever

In [1]:
import urllib

In [2]:
f = urllib.urlopen("http://www.gutenberg.org/ebooks/3420.txt.utf-8")

In [3]:
book_text = f.read().decode('utf-8')

In [4]:
book_text = book_text.replace("\r","")

In [12]:
print book_text[14000:15000]


 that can be freely distributed
in machine readable form.

The Project gratefully accepts contributions of money, time,
public domain materials, or royalty free copyright licenses.
Money should be paid to the:
"Project Gutenberg Literary Archive Foundation."

If you are interested in contributing scanning equipment or
software or other items, please contact Michael Hart at:
hart@pobox.com

*END THE SMALL PRINT! FOR PUBLIC DOMAIN ETEXTS*Ver.12.12.00*END*





This etext was produced by
Amy E Zelmer  <a.zelmer@cqu.edu.au>
Col Choat  <CChoat@sanderson.net.au>
Sue Asscher  <asschers@dingoblue.net.au>





A VINDICATION OF THE RIGHTS OF WOMAN,
WITH STRICTURES ON POLITICAL AND MORAL SUBJECTS,
BY MARY WOLLSTONECRAFT.

WITH A BIOGRAPHICAL SKETCH OF THE AUTHOR.




CONTENTS.


INTRODUCTION.

CHAPTER 1.  THE RIGHTS AND INVOLVED DUTIES OF MANKIND CONSIDERED.

CHAPTER 2.  THE PREVAILING OPINION OF A SEXUAL CHARACTER DISCUSSED.

CHAPTER 3.  THE SAME SUBJECT CONTINUED.

CHAPTER 4.  OBSERVATIONS ON T

In [8]:
import re

In [10]:
re.UNICODE = True
re.DOTALL = True

In [13]:
book_text = re.split("\*END THE SMALL PRINT! FOR PUBLIC DOMAIN ETEXTS\*.*", book_text)[1]

In [14]:
len(book_text)


Out[14]:
509270

In [23]:
print book_text[:500]
print "[...]"
print book_text[-150:]


This etext was produced by
Amy E Zelmer  <a.zelmer@cqu.edu.au>
Col Choat  <CChoat@sanderson.net.au>
Sue Asscher  <asschers@dingoblue.net.au>





A VINDICATION OF THE RIGHTS OF WOMAN,
WITH STRICTURES ON POLITICAL AND MORAL SUBJECTS,
BY MARY WOLLSTONECRAFT.

WITH A BIOGRAPHICAL SKETCH OF THE AUTHOR.




CONTENTS.


INTRODUCTION.

CHAPTER 1.  THE RIGHTS AND INVOLVED DUTIES OF MANKIND CONSIDERED.

CHAPTER 2.  THE PREVAILING OPINION OF A SEXUAL CHARACTER DISCUSSED.

CHAPTER 3.  THE SAME SUBJECT CONT
[...]
sk-masters, expecting virtue where nature has
not given understanding!





End of The Project Gutenberg Etext of A Vindication of the Rights of Woman

In [18]:
book_text = book_text.strip()

In [22]:
help(re.sub)


Help on function sub in module re:

sub(pattern, repl, string, count=0, flags=0)
    Return the string obtained by replacing the leftmost
    non-overlapping occurrences of the pattern in string by the
    replacement repl.  repl can be either a string or a callable;
    if a string, backslash escapes in it are processed.  If it is
    a callable, it's passed the match object and must return
    a replacement string to be used.


In [24]:
book_text = re.sub("This etext was produced by(?:\n.+)*\n*?", "", book_text)

In [26]:
book_text = re.sub("\n.*$", "", book_text)

In [27]:
book_text = book_text.strip()

In [29]:
print book_text[:100]
print "[...]"
print book_text[-100:]


A VINDICATION OF THE RIGHTS OF WOMAN,
WITH STRICTURES ON POLITICAL AND MORAL SUBJECTS,
BY MARY WOLLS
[...]
will be
worse than Egyptian task-masters, expecting virtue where nature has
not given understanding!

In [31]:
book_text = book_text.lower()
book_text = re.sub("[^\w\s]", "", book_text)

In [32]:
words = re.split("[\s]+", book_text)

In [33]:
len(words)


Out[33]:
86446

In [34]:
f = urllib.urlopen("http://louis.philotech.org/confs/MtlPythonEn2014/corpus/stoplist.en.utf-8.txt")
stoplist = f.read().decode('utf-8').split("\n")

In [35]:
len(stoplist)


Out[35]:
562

In [36]:
stoplist[:50]


Out[36]:
[u'a',
 u'aboard',
 u'about',
 u'above',
 u'across',
 u'after',
 u'again',
 u'against',
 u'all',
 u'almost',
 u'alone',
 u'along',
 u'alongside',
 u'already',
 u'also',
 u'although',
 u'always',
 u'am',
 u'amid',
 u'amidst',
 u'among',
 u'amongst',
 u'an',
 u'and',
 u'another',
 u'anti',
 u'any',
 u'anybody',
 u'anyone',
 u'anything',
 u'anywhere',
 u'are',
 u'area',
 u'areas',
 u"aren't",
 u'around',
 u'as',
 u'ask',
 u'asked',
 u'asking',
 u'asks',
 u'astride',
 u'at',
 u'aught',
 u'away',
 u'back',
 u'backed',
 u'backing',
 u'backs',
 u'bar']

In [37]:
words = [ w for w in words if len(w) > 1 and w.isalpha() ]
words = [ w for w in words if w not in stoplist ] 
len(words)


Out[37]:
34244

In [38]:
help(xrange)


Help on class xrange in module __builtin__:

class xrange(object)
 |  xrange(stop) -> xrange object
 |  xrange(start, stop[, step]) -> xrange object
 |  
 |  Like range(), but instead of returning a list, returns an object that
 |  generates the numbers in the range on demand.  For looping, this is 
 |  slightly faster than range() and more memory efficient.
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(...)
 |      x.__getattribute__('name') <==> x.name
 |  
 |  __getitem__(...)
 |      x.__getitem__(y) <==> x[y]
 |  
 |  __iter__(...)
 |      x.__iter__() <==> iter(x)
 |  
 |  __len__(...)
 |      x.__len__() <==> len(x)
 |  
 |  __reduce__(...)
 |  
 |  __repr__(...)
 |      x.__repr__() <==> repr(x)
 |  
 |  __reversed__(...)
 |      Returns a reverse iterator.
 |  
 |  ----------------------------------------------------------------------
 |  Data and other attributes defined here:
 |  
 |  __new__ = <built-in method __new__ of type object>
 |      T.__new__(S, ...) -> a new object with type S, a subtype of T


In [39]:
t = 50
words_seg = [ words[i:i+t] for i in xrange(0, len(words), t) ]

In [40]:
len(words_seg)


Out[40]:
685

In [41]:
print words_seg[0]


[u'vindication', u'rights', u'woman', u'strictures', u'political', u'moral', u'subjects', u'mary', u'wollstonecraft', u'biographical', u'sketch', u'author', u'contents', u'introduction', u'chapter', u'rights', u'involved', u'duties', u'mankind', u'considered', u'chapter', u'prevailing', u'opinion', u'sexual', u'character', u'discussed', u'chapter', u'subject', u'continued', u'chapter', u'observations', u'degradation', u'woman', u'reduced', u'causes', u'chapter', u'animadversions', u'writers', u'rendered', u'women', u'objects', u'pity', u'bordering', u'contempt', u'chapter', u'effect', u'association', u'ideas', u'character', u'chapter']
Stopped at section 2.3 http://nbviewer.ipython.org/url/louis.philotech.org/confs/MtlPythonEn2014/MtlPythonEn2014.ipynb

In [ ]: