In [4]:
import requests
proxies = {
  "http": "69.30.209.16:8000"
}
res = requests.get("http://www.python.org", proxies=proxies) 
print res.text


<!doctype html>
<!--[if lt IE 7]>   <html class="no-js ie6 lt-ie7 lt-ie8 lt-ie9">   <![endif]-->
<!--[if IE 7]>      <html class="no-js ie7 lt-ie8 lt-ie9">          <![endif]-->
<!--[if IE 8]>      <html class="no-js ie8 lt-ie9">                 <![endif]-->
<!--[if gt IE 8]><!--><html class="no-js" lang="en" dir="ltr">  <!--<![endif]-->

<head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">

    <link rel="prefetch" href="//ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js">

    <meta name="application-name" content="Python.org">
    <meta name="msapplication-tooltip" content="The official home of the Python Programming Language">
    <meta name="apple-mobile-web-app-title" content="Python.org">
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black">

    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta name="HandheldFriendly" content="True">
    <meta name="format-detection" content="telephone=no">
    <meta http-equiv="cleartype" content="on">
    <meta http-equiv="imagetoolbar" content="false">

    <script src="/static/js/libs/modernizr.js"></script>

    <link href="/static/stylesheets/style.css" rel="stylesheet" type="text/css" title="default" />
    <link href="/static/stylesheets/mq.css" rel="stylesheet" type="text/css" media="not print, braille, embossed, speech, tty" />
    

    <!--[if (lte IE 8)&(!IEMobile)]>
    <link href="/static/stylesheets/no-mq.css" rel="stylesheet" type="text/css" media="screen" />
    
    
    <![endif]-->

    
    <link rel="icon" type="image/x-icon" href="/static/favicon.ico">
    <link rel="apple-touch-icon-precomposed" sizes="144x144" href="/static/apple-touch-icon-144x144-precomposed.png">
    <link rel="apple-touch-icon-precomposed" sizes="114x114" href="/static/apple-touch-icon-114x114-precomposed.png">
    <link rel="apple-touch-icon-precomposed" sizes="72x72" href="/static/apple-touch-icon-72x72-precomposed.png">
    <link rel="apple-touch-icon-precomposed" href="/static/apple-touch-icon-precomposed.png">
    <link rel="apple-touch-icon" href="/static/apple-touch-icon-precomposed.png">

    
    <meta name="msapplication-TileImage" content="/static/metro-icon-144x144-precomposed.png"><!-- white shape -->
    <meta name="msapplication-TileColor" content="#3673a5"><!-- python blue -->
    <meta name="msapplication-navbutton-color" content="#3673a5">

    <meta property="og:site_name" content="Python.org">
    <meta property="og:type" content="website">

    <title>Welcome to Python.org</title>
    <meta property="og:title" content="Welcome to Python.org">

    
    <meta name="description" content="The official home of the Python Programming Language">
    <meta name="og:description" content="The official home of the Python Programming Language">
    <meta name="keywords" content="Python programming language object oriented web free open source software license documentation download community">
    <meta property="og:tag" content="Python programming language object oriented web free open source software license documentation download community">
    <meta property="og:published_time" content="">
    <meta property="og:modified_time" content="">
    <meta property="og:author" content="">
    <meta property="og:section" content=""> 
    <meta property="og:url" content="">
    <meta property="og:image" content="">
    <meta property="og:video" content="">

    <link rel="author" href="/static/humans.txt">

    

    
    <script type="application/ld+json">
     {
       "@context": "http://schema.org",
       "@type": "WebSite",
       "url": "https://www.python.org/",
       "potentialAction": {
         "@type": "SearchAction",
         "target": "https://www.python.org/search/?q={search_term_string}",
         "query-input": "required name=search_term_string"
       }
     }
    </script>

    
    <script type="text/javascript">
    var _gaq = _gaq || [];
    _gaq.push(['_setAccount', 'UA-39055973-1']);
    _gaq.push(['_trackPageview']);

    (function() {
        var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
        ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
        var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
    })();
    </script>
    
</head>

<body class="python home" id="homepage">

    <div id="touchnav-wrapper">

        <div id="nojs" class="do-not-print">
            <p><strong>Notice:</strong> While Javascript is not essential for this website, your interaction with the content will be limited. Please turn Javascript on for the full experience. </p>
        </div>

        <!--[if lt IE 8]>
        <div id="oldie-warning" class="do-not-print">
            <p><strong>Notice:</strong> Your browser is <em>ancient</em> and <a href="http://www.ie6countdown.com/">Microsoft agrees</a>. <a href="http://browsehappy.com/">Upgrade to a different browser</a> or <a href="http://www.google.com/chromeframe/?redirect=true">install Google Chrome Frame</a> to experience a better web.</p>
        </div>
        <![endif]-->

        <!-- Sister Site Links -->
        <div id="top" class="top-bar do-not-print">

            <nav class="meta-navigation container" role="navigation">

                
                <div class="skip-link screen-reader-text">
                    <a href="#content" title="Skip to content">Skip to content</a>
                </div>

                
                <a id="close-python-network" class="jump-link" href="#python-network" aria-hidden="true">
                    <span aria-hidden="true" class="icon-arrow-down"><span>&#9660;</span></span> Close
                </a>

                

<ul class="menu" role="tree">
    
    <li class="python-meta current_item selectedcurrent_branch selected">
        <a href="/" title="The Python Programming Language" class="current_item selectedcurrent_branch selected">Python</a>
    </li>
    
    <li class="psf-meta ">
        <a href="/psf-landing/" title="The Python Software Foundation" >PSF</a>
    </li>
    
    <li class="docs-meta ">
        <a href="https://docs.python.org" title="Python Documentation" >Docs</a>
    </li>
    
    <li class="pypi-meta ">
        <a href="https://pypi.python.org/" title="Python Package Index" >PyPI</a>
    </li>
    
    <li class="jobs-meta ">
        <a href="/jobs/" title="Python Job Board" >Jobs</a>
    </li>
    
    <li class="shop-meta ">
        <a href="/community/" title="Python Community" >Community</a>
    </li>
    
</ul>


                <a id="python-network" class="jump-link" href="#top" aria-hidden="true">
                    <span aria-hidden="true" class="icon-arrow-up"><span>&#9650;</span></span> The Python Network
                </a>

            </nav>

        </div>

        <!-- Header elements -->
        <header class="main-header" role="banner">
            <div class="container">

                <h1 class="site-headline">
                    <a href="/"><img class="python-logo" src="/static/img/python-logo.png" alt="python&trade;"></a>
                </h1>

                <div class="options-bar do-not-print">

                    
                    <a id="site-map-link" class="jump-to-menu" href="#site-map"><span class="menu-icon">&equiv;</span> Menu</a><form class="search-the-site" action="/search/" method="get">
                        <fieldset title="Search Python.org">

                            <span aria-hidden="true" class="icon-search"></span>

                            <label class="screen-reader-text" for="id-search-field">Search This Site</label>
                            <input id="id-search-field" name="q" type="search" role="textbox" class="search-field" placeholder="Search" value="" tabindex="1">

                            <button type="submit" name="submit" id="submit" class="search-button" title="Submit this Search" tabindex="3">
                                GO
                            </button>

                            
                            <!--[if IE]><input type="text" style="display: none;" disabled="disabled" size="1" tabindex="4"><![endif]-->

                        </fieldset>
                    </form><span class="breaker"></span><div class="adjust-font-size" aria-hidden="true">
                        <ul class="navigation menu" aria-label="Adjust Text Size on Page">
                            <li class="tier-1 last" aria-haspopup="true">
                                <a href="#" class="action-trigger"><strong><small>A</small> A</strong></a>
                                <ul class="subnav menu">
                                    <li class="tier-2 element-1" role="treeitem"><a class="text-shrink" title="Make Text Smaller" href="javascript:;">Smaller</a></li>
                                    <li class="tier-2 element-2" role="treeitem"><a class="text-grow" title="Make Text Larger" href="javascript:;">Larger</a></li>
                                    <li class="tier-2 element-3" role="treeitem"><a class="text-reset" title="Reset any font size changes I have made" href="javascript:;">Reset</a></li>
                                </ul>
                            </li>
                        </ul>
                    </div><div class="winkwink-nudgenudge">
                        <ul class="navigation menu" aria-label="Social Media Navigation">
                            <li class="tier-1 last" aria-haspopup="true">
                                <a href="#" class="action-trigger">Socialize</a>
                                <ul class="subnav menu">
                                    <li class="tier-2 element-1" role="treeitem"><a href="http://plus.google.com/+Python"><span aria-hidden="true" class="icon-google-plus"></span>Google+</a></li>
                                    <li class="tier-2 element-2" role="treeitem"><a href="http://www.facebook.com/pythonlang?fref=ts"><span aria-hidden="true" class="icon-facebook"></span>Facebook</a></li>
                                    <li class="tier-2 element-3" role="treeitem"><a href="http://twitter.com/ThePSF"><span aria-hidden="true" class="icon-twitter"></span>Twitter</a></li>
                                    <li class="tier-2 element-4" role="treeitem"><a href="/community/irc/"><span aria-hidden="true" class="icon-freenode"></span>Chat on IRC</a></li>
                                </ul>
                            </li>
                        </ul>
                    </div><div class="account-signin">
                        <ul class="navigation menu" aria-label="Social Media Navigation">
                            <li class="tier-1 last" aria-haspopup="true">
                                
                                <a href="/accounts/login/" title="Sign Up or Sign In to Python.org">Sign In</a>
                                <ul class="subnav menu">
                                    <li class="tier-2 element-1" role="treeitem"><a href="/accounts/signup/">Sign Up / Register</a></li>
                                    <li class="tier-2 element-2" role="treeitem"><a href="/accounts/login/">Sign In</a></li>
                                </ul>
                                
                            </li>
                        </ul>
                    </div>

                </div><!-- end options-bar -->

                <nav id="mainnav" class="python-navigation main-navigation do-not-print" role="navigation">
                    
                        
<ul class="navigation menu" role="menubar" aria-label="Main Navigation">
  
    
    
    <li id="about" class="tier-1 element-1  " aria-haspopup="true">
        <a href="/about/" title="" class="">About</a>
        
            

<ul class="subnav menu" role="menu" aria-hidden="true">
    
        <li class="tier-2 element-1" role="treeitem"><a href="/about/apps/" title="">Applications</a></li>
    
        <li class="tier-2 element-2" role="treeitem"><a href="/about/quotes/" title="">Quotes</a></li>
    
        <li class="tier-2 element-3" role="treeitem"><a href="/about/gettingstarted/" title="">Getting Started</a></li>
    
        <li class="tier-2 element-4" role="treeitem"><a href="/about/help/" title="">Help</a></li>
    
</ul>

        
    </li>
    
    
    
    <li id="downloads" class="tier-1 element-2  " aria-haspopup="true">
        <a href="/downloads/" title="" class="">Downloads</a>
        
            

<ul class="subnav menu" role="menu" aria-hidden="true">
    
        <li class="tier-2 element-1" role="treeitem"><a href="/downloads/" title="">All releases</a></li>
    
        <li class="tier-2 element-2" role="treeitem"><a href="/downloads/source/" title="">Source code</a></li>
    
        <li class="tier-2 element-3" role="treeitem"><a href="/downloads/windows/" title="">Windows</a></li>
    
        <li class="tier-2 element-4" role="treeitem"><a href="/downloads/mac-osx/" title="">Mac OS X</a></li>
    
        <li class="tier-2 element-5" role="treeitem"><a href="/download/other/" title="">Other Platforms</a></li>
    
        <li class="tier-2 element-6" role="treeitem"><a href="https://docs.python.org/3/license.html" title="">License</a></li>
    
        <li class="tier-2 element-7" role="treeitem"><a href="/download/alternatives" title="">Alternative Implementations</a></li>
    
</ul>

        
    </li>
    
    
    
    <li id="documentation" class="tier-1 element-3  " aria-haspopup="true">
        <a href="/doc/" title="" class="">Documentation</a>
        
            

<ul class="subnav menu" role="menu" aria-hidden="true">
    
        <li class="tier-2 element-1" role="treeitem"><a href="/doc/" title="">Docs</a></li>
    
        <li class="tier-2 element-2" role="treeitem"><a href="/doc/av" title="">Audio/Visual Talks</a></li>
    
        <li class="tier-2 element-3" role="treeitem"><a href="https://wiki.python.org/moin/BeginnersGuide" title="">Beginner&#39;s Guide</a></li>
    
        <li class="tier-2 element-4" role="treeitem"><a href="https://docs.python.org/devguide/" title="">Developer&#39;s Guide</a></li>
    
        <li class="tier-2 element-5" role="treeitem"><a href="https://docs.python.org/faq/" title="">FAQ</a></li>
    
        <li class="tier-2 element-6" role="treeitem"><a href="http://wiki.python.org/moin/Languages" title="">Non-English Docs</a></li>
    
        <li class="tier-2 element-7" role="treeitem"><a href="http://python.org/dev/peps/" title="">PEP Index</a></li>
    
        <li class="tier-2 element-8" role="treeitem"><a href="https://wiki.python.org/moin/PythonBooks" title="">Python Books</a></li>
    
</ul>

        
    </li>
    
    
    
    <li id="community" class="tier-1 element-4  " aria-haspopup="true">
        <a href="/community/" title="" class="">Community</a>
        
            

<ul class="subnav menu" role="menu" aria-hidden="true">
    
        <li class="tier-2 element-1" role="treeitem"><a href="/community/diversity/" title="">Diversity</a></li>
    
        <li class="tier-2 element-2" role="treeitem"><a href="/community/irc/" title="">IRC</a></li>
    
        <li class="tier-2 element-3" role="treeitem"><a href="/community/lists/" title="">Mailing Lists</a></li>
    
        <li class="tier-2 element-4" role="treeitem"><a href="/community/workshops/" title="">Python Conferences</a></li>
    
        <li class="tier-2 element-5" role="treeitem"><a href="/community/sigs/" title="">Special Interest Groups</a></li>
    
        <li class="tier-2 element-6" role="treeitem"><a href="https://wiki.python.org/moin/" title="">Python Wiki</a></li>
    
        <li class="tier-2 element-7" role="treeitem"><a href="/community/logos/" title="">Python Logo</a></li>
    
        <li class="tier-2 element-8" role="treeitem"><a href="/community/merchandise/" title="">Merchandise</a></li>
    
        <li class="tier-2 element-9" role="treeitem"><a href="/community/awards" title="">Community Awards</a></li>
    
</ul>

        
    </li>
    
    
    
    <li id="success-stories" class="tier-1 element-5  " aria-haspopup="true">
        <a href="/about/success/" title="success-stories" class="">Success Stories</a>
        
            

<ul class="subnav menu" role="menu" aria-hidden="true">
    
        <li class="tier-2 element-1" role="treeitem"><a href="/about/success/#arts" title="">Arts</a></li>
    
        <li class="tier-2 element-2" role="treeitem"><a href="/about/success/#business" title="">Business</a></li>
    
        <li class="tier-2 element-3" role="treeitem"><a href="/about/success/#education" title="">Education</a></li>
    
        <li class="tier-2 element-4" role="treeitem"><a href="/about/success/#engineering" title="">Engineering</a></li>
    
        <li class="tier-2 element-5" role="treeitem"><a href="/about/success/#government" title="">Government</a></li>
    
        <li class="tier-2 element-6" role="treeitem"><a href="/about/success/#scientific" title="">Scientific</a></li>
    
        <li class="tier-2 element-7" role="treeitem"><a href="/about/success/#software-development" title="">Software Development</a></li>
    
</ul>

        
    </li>
    
    
    
    <li id="news" class="tier-1 element-6  " aria-haspopup="true">
        <a href="/blogs/" title="News from around the Python world" class="">News</a>
        
            

<ul class="subnav menu" role="menu" aria-hidden="true">
    
        <li class="tier-2 element-1" role="treeitem"><a href="/blogs/" title="Python Insider Blog Posts">Python News</a></li>
    
        <li class="tier-2 element-2" role="treeitem"><a href="http://planetpython.org/" title="Planet Python">Community News</a></li>
    
        <li class="tier-2 element-3" role="treeitem"><a href="http://pyfound.blogspot.com/" title="PSF Blog">PSF News</a></li>
    
        <li class="tier-2 element-4" role="treeitem"><a href="http://pycon.blogspot.com/" title="PyCon Blog">PyCon News</a></li>
    
</ul>

        
    </li>
    
    
    
    <li id="events" class="tier-1 element-7  " aria-haspopup="true">
        <a href="/events/" title="" class="">Events</a>
        
            

<ul class="subnav menu" role="menu" aria-hidden="true">
    
        <li class="tier-2 element-1" role="treeitem"><a href="/events/python-events/" title="">Python Events</a></li>
    
        <li class="tier-2 element-2" role="treeitem"><a href="/events/python-user-group/" title="">User Group Events</a></li>
    
        <li class="tier-2 element-3" role="treeitem"><a href="/events/python-events/past/" title="">Python Events Archive</a></li>
    
        <li class="tier-2 element-4" role="treeitem"><a href="/events/python-user-group/past/" title="">User Group Events Archive</a></li>
    
        <li class="tier-2 element-5" role="treeitem"><a href="https://wiki.python.org/moin/PythonEventsCalendar#Submitting_an_Event" title="">Submit an Event</a></li>
    
</ul>

        
    </li>
    
    
    
    
  
</ul>

                    
                </nav>

                <div class="header-banner "> <!-- for optional "do-not-print" class -->
                    
        <div id="dive-into-python" class="flex-slideshow slideshow">

            <ul class="launch-shell menu" id="launch-shell">
                <li>
                    <a class="button prompt" id="start-shell" data-shell-container="#dive-into-python" href="/shell/">&gt;_
                        <span class="message">Launch Interactive Shell</span>
                    </a>
                </li>
            </ul>

            <ul class="slides menu">
                
                <li>
                    <div class="slide-code"><pre><code><span class="comment"># Python 3: Fibonacci series up to n</span>
>>> def fib(n):
>>>     a, b = 0, 1
>>>     while a &lt; n:
>>>         print(a, end=' ')
>>>         a, b = b, a+b
>>>     print()
>>> fib(1000)
<span class="output">0 1 1 2 3 5 8 13 21 34 55 89 144 233 377 610 987</span></code></pre></div>
                    <div class="slide-copy"><h1>Functions Defined</h1>
<p>The core of extensible programming is defining functions. Python allows mandatory and optional arguments, keyword arguments, and even arbitrary argument lists. <a href="//docs.python.org/3/tutorial/controlflow.html#defining-functions">More about defining functions in Python&nbsp;3</a></p></div>
                </li>
                
                <li>
                    <div class="slide-code"><pre><code><span class="comment"># Python 3: List comprehensions</span>
>>> fruits = ['Banana', 'Apple', 'Lime']
>>> loud_fruits = [fruit.upper() for fruit in fruits]
>>> print(loud_fruits)
<span class="output">['BANANA', 'APPLE', 'LIME']</span>

<span class="comment"># List and the enumerate function</span>
>>> list(enumerate(fruits))
<span class="output">[(0, 'Banana'), (1, 'Apple'), (2, 'Lime')]</span></code></pre></div>
                    <div class="slide-copy"><h1>Compound Data Types</h1>
<p>Lists (known as arrays in other languages) are one of the compound data types that Python understands. Lists can be indexed, sliced and manipulated with other built-in functions. <a href="//docs.python.org/3/tutorial/introduction.html#lists">More about lists in Python&nbsp;3</a></p></div>
                </li>
                
                <li>
                    <div class="slide-code"><pre><code><span class="comment"># Python 3: Simple arithmetic</span>
>>> 1 / 2
<span class="output">0.5</span>
>>> 2 ** 3
<span class="output">8</span>
>>> 17 / 3  <span class="comment"># classic division returns a float</span>
<span class="output">5.666666666666667</span>
>>> 17 // 3  <span class="comment"># floor division</span>
<span class="output">5</span></code></pre></div>
                    <div class="slide-copy"><h1>Intuitive Interpretation</h1>
<p>Calculations are simple with Python, and expression syntax is straightforward: the operators <code>+</code>, <code>-</code>, <code>*</code> and <code>/</code> work as expected; parentheses <code>()</code> can be used for grouping. <a href="http://docs.python.org/3/tutorial/introduction.html#using-python-as-a-calculator">More about simple math functions in Python&nbsp;3</a>.</p></div>
                </li>
                
                <li>
                    <div class="slide-code"><pre><code><span class="comment"># Python 3: Simple output (with Unicode)</span>
>>> print("Hello, I'm Python!")
<span class="output">Hello, I'm Python!</span>

<span class="comment"># Input, assignment</span>
>>> name = input('What is your name?\n')
>>> print('Hi, %s.' % name)
<span class="output">What is your name?
Python
Hi, Python.</span></code></pre></div>
                    <div class="slide-copy"><h1>Quick &amp; Easy to Learn</h1>
<p>Experienced programmers in any other language can pick up Python very quickly, and beginners find the clean syntax and indentation structure easy to learn. <a href="//docs.python.org/3/tutorial/">Whet your appetite</a> with our Python&nbsp;3 overview.</p>
                   </div>
                </li>
                
                <li>
                    <div class="slide-code"><pre><code><span class=\"comment\"># For loop on a list</span>
>>> numbers = [2, 4, 6, 8]
>>> product = 1
>>> for number in numbers:
...    product = product * number
... 
>>> print('The product is:', product)
<span class=\"output\">The product is: 384</span></code></pre></div>
                    <div class="slide-copy"><h1>All the Flow You&rsquo;d Expect</h1>
<p>Python knows the usual control flow statements that other languages speak &mdash; <code>if</code>, <code>for</code>, <code>while</code> and <code>range</code> &mdash; with some of its own twists, of course. <a href="//docs.python.org/3/tutorial/controlflow.html">More control flow tools in Python&nbsp;3</a></p></div>
                </li>
                
            </ul>
        </div>


                </div>

                
        <div class="introduction">
            <p>Python is a programming language that lets you work quickly <span class="breaker"></span>and integrate systems more effectively. <a class="readmore" href="/doc/">Learn More</a></p>
        </div>


             </div><!-- end .container -->
        </header>

        <div id="content" class="content-wrapper">
            <!-- Main Content Column -->
            <div class="container">

                <section class="main-content " role="main">

                    
                    

                    

                <div class="row">

                    <div class="small-widget get-started-widget">
                        <h2 class="widget-title"><span aria-hidden="true" class="icon-get-started"></span>Get Started</h2>
<p>Whether you're new to programming or an experienced developer, it's easy to learn and use Python.</p>
<p><a href="/about/gettingstarted/">Start with our Beginner&rsquo;s Guide</a></p>
                    </div>

                    <div class="small-widget download-widget">
                        <h2 class="widget-title"><span aria-hidden="true" class="icon-download"></span>Download</h2>
<p>Python source code and installers are available for download for all versions! Not sure which version to use? <a href="https://wiki.python.org/moin/Python2orPython3">Check here</a>.</p>
<p>Latest: <a href="/downloads/release/python-350/">Python 3.5.0</a> - <a href="/downloads/release/python-2710/">Python 2.7.10</a></p>
                    </div>

                    <div class="small-widget documentation-widget">
                        <h2 class="widget-title"><span aria-hidden="true" class="icon-documentation"></span>Docs</h2>
<p>Documentation for Python's standard library, along with tutorials and guides, are available online.</p>
<p><a href="https://docs.python.org">docs.python.org</a></p>
                    </div>

                    <div class="small-widget jobs-widget last">
                        <h2 class="widget-title"><span aria-hidden="true" class="icon-jobs"></span>Jobs</h2>
<p>Looking for work or have a Python related position that you're trying to hire for? Our <strong>relaunched community-run job board</strong> is the place to go.</p>
<p><a href="//jobs.python.org">jobs.python.org</a></p>
                    </div>

                </div>

                <div class="list-widgets row">

                    <div class="medium-widget blog-widget">
                        
                        <div class="shrubbery">
                        
                            <h2 class="widget-title"><span aria-hidden="true" class="icon-news"></span>Latest News</h2>
                            <p class="give-me-more"><a href="http://blog.python.org" title="More News">More</a></p>
                            
                            <ul class="menu">
                                
                                
                                <li>
<time datetime="2015-09-13T14:28:00.000006+00:00"><span class="say-no-more">2015-</span>09-13</time>
 <a href="http://feedproxy.google.com/~r/PythonInsider/~3/a6zwstMbRrg/python-350-has-been-released.html">Python 3.5.0 is now&nbsp;available for download. Python 3.5.0 is the ...</a></li>
                                
                                <li>
<time datetime="2015-09-09T13:43:00.000002+00:00"><span class="say-no-more">2015-</span>09-09</time>
 <a href="http://feedproxy.google.com/~r/PythonInsider/~3/D-XkdrMEtE0/python-350-release-candidate-4-released.html">Python 3.5.0rc4 is now&nbsp;available for download. This is a last-minute ...</a></li>
                                
                                <li>
<time datetime="2015-09-08T01:37:00.000001+00:00"><span class="say-no-more">2015-</span>09-08</time>
 <a href="http://feedproxy.google.com/~r/PythonInsider/~3/wxPYPcIYdqo/python-350-release-candidate-3-released.html">Python 3.5.0rc3 is now&nbsp;available for download.This is a preview release, ...</a></li>
                                
                                <li>
<time datetime="2015-08-11T00:34:40.000001+00:00"><span class="say-no-more">2015-</span>08-11</time>
 <a href="http://feedproxy.google.com/~r/PythonInsider/~3/a1yIx-RxQW4/python-3.html">Python 3.5.0rc1 is now&nbsp;available for download. This is a preview ...</a></li>
                                
                                <li>
<time datetime="2015-07-05T17:22:00.000006+00:00"><span class="say-no-more">2015-</span>07-05</time>
 <a href="http://feedproxy.google.com/~r/PythonInsider/~3/4Fo1tWPI_P0/python-350b3-released.html">Python 3.5.0b3 is now&nbsp;available for download. This is a preview ...</a></li>
                                
                            </ul>
                        </div><!-- end .shrubbery -->

                    </div>

                    <div class="medium-widget event-widget last">
                        
                        <div class="shrubbery">
                        
                            <h2 class="widget-title"><span aria-hidden="true" class="icon-calendar"></span>Upcoming Events</h2>
                            <p class="give-me-more"><a href="/events/calendars/" title="More Events">More</a></p>
                            
                            <ul class="menu">
                                
                                
                                
                                <li>
<time datetime="2015-09-19T00:00:00+00:00"><span class="say-no-more">2015-</span>09-19</time>
 <a href="/events/python-events/348/">PyConChina 2015 - Beijing</a></li>
                                
                                
                                
                                <li>
<time datetime="2015-09-20T00:00:00+00:00"><span class="say-no-more">2015-</span>09-20</time>
 <a href="/events/python-events/347/">PyConChina 2015 - Hangzhou</a></li>
                                
                                
                                
                                <li>
<time datetime="2015-09-24T16:00:00+00:00"><span class="say-no-more">2015-</span>09-24</time>
 <a href="/events/python-user-group/345/">PyData Warsaw #1</a></li>
                                
                                
                                
                                <li>
<time datetime="2015-09-25T00:00:00+00:00"><span class="say-no-more">2015-</span>09-25</time>
 <a href="/events/python-events/322/">PyTexas 2015</a></li>
                                
                                
                                
                                <li>
<time datetime="2015-09-26T00:00:00+00:00"><span class="say-no-more">2015-</span>09-26</time>
 <a href="/events/python-user-group/351/">PyDDF Python Sprint 2015</a></li>
                                
                                
                            </ul>
                        </div>

                    </div>

                </div>

                <div class="row">

                    <div class="medium-widget success-stories-widget">
                        

                        <div class="shrubbery">
                            

                            <h2 class="widget-title"><span aria-hidden="true" class="icon-success-stories"></span>Success Stories</h2>
                            <p class="give-me-more"><a href="/success-stories/" title="More Success Stories">More</a></p>

                            
                            <div class="success-story-item" data-weight="0" id="success-story-2" style="display: none;">

                            <blockquote>
                                <a href="/success-stories/industrial-light-magic-runs-python/">ILM runs a batch processing environment capable of modeling, rendering and compositing tens of thousands of motion picture frames per day. Thousands of machines running Linux, IRIX, Compaq Tru64, OS X, Solaris, and Windows join together to provide a production pipeline used by ~800 users daily. Speed of development is key, and Python was a faster way to code (and re-code) the programs that control this production pipeline.</a>
                            </blockquote>

                            <table cellpadding="0" cellspacing="0" border="0" width="100%" class="quote-from">
                                <tbody>
                                    <tr>
                                        
                                        <td><p><a href="/success-stories/industrial-light-magic-runs-python/">Industrial Light &amp; Magic Runs on Python</a> <em>by Tim Fortenberry</em></p></td>
                                    </tr>
                                </tbody>
                            </table>
                            </div>
                            

                        </div><!-- end .shrubbery -->

                    </div>

                    <div class="medium-widget applications-widget last">
                        <div class="shrubbery">
                            <h2 class="widget-title"><span aria-hidden="true" class="icon-python"></span>Use Python for&hellip;</h2>
<p class="give-me-more"><a href="/about/apps" title="More Applications">More</a></p>

<ul class="menu">
    <li><b>Web Programming</b>:
        <span class="tag-wrapper"><a class="tag" href="http://www.djangoproject.com/">Django</a>, <a class="tag" href="http://www.pylonsproject.org/">Pyramid</a>, <a class="tag" href="http://bottlepy.org">Bottle</a>, <a class="tag" href="http://tornadoweb.org">Tornado</a>, <a href="http://flask.pocoo.org/" class="tag">Flask</a>, <a class="tag" href="http://www.web2py.com/">web2py</a></span></li>
    <li><b>GUI Development</b>:
        <span class="tag-wrapper"><a class="tag" href="http://www.wxpython.org/">wxPython</a>, <a class="tag" href="http://wiki.python.org/moin/TkInter">tkInter</a>, <a class="tag" href="http://www.pygtk.org">PyGtk</a>, <a class="tag" href="https://wiki.gnome.org/Projects/PyGObject">PyGObject</a>, <a class="tag" href="http://www.riverbankcomputing.co.uk/software/pyqt/intro">PyQt</a></span></li>
    <li><b>Scientific and Numeric</b>:
        <span class="tag-wrapper">
<a class="tag" href="http://www.scipy.org">SciPy</a>, <a class="tag" href="http://pandas.pydata.org/">Pandas</a>, <a href="http://ipython.org" class="tag">IPython</a></span></li>
    <li><b>Software Development</b>:
        <span class="tag-wrapper"><a class="tag" href="http://buildbot.net/">Buildbot</a>, <a class="tag" href="http://trac.edgewall.org/">Trac</a>, <a class="tag" href="http://roundup.sourceforge.net/">Roundup</a></span></li>
    <li><b>System Administration</b>:
        <span class="tag-wrapper"><a class="tag" href="http://www.ansible.com">Ansible</a>, <a class="tag" href="http://www.saltstack.com">Salt</a>, <a class="tag" href="https://www.openstack.org">OpenStack</a></span></li>
</ul>

                        </div><!-- end .shrubbery -->
                    </div>

                </div>

                
                <div class="pep-widget">

                    <h2 class="widget-title">
                        <span class="prompt">&gt;&gt;&gt;</span> <a href="/dev/peps/">Python Enhancement Proposals<span class="say-no-more"> (PEPs)</span></a>: The future of Python<span class="say-no-more"> is discussed here.</span>
                        <a aria-hidden="true" class="rss-link" href="/dev/peps/peps.rss"><span class="icon-feed"></span> RSS</a>
                    </h2>


                    
                    
                </div>

                                <div class="psf-widget">

                    <div class="python-logo"></div>
                    
                    <h2 class="widget-title">
    <span class="prompt">&gt;&gt;&gt;</span> <a href="/psf/">Python Software Foundation</a>
</h2>
<p>The mission of the Python Software Foundation is to promote, protect, and advance the Python programming language, and to support and facilitate the growth of a diverse and international community of Python programmers. <a class="readmore" href="/psf/">Learn more</a> </p>
<p class="click-these">
    <a class="button" href="/users/membership/">Become a Member</a>
    <a class="button" href="/psf/donations/">Donate to the PSF</a>
</p>
                </div>




                </section>

                
                

                
                


            </div><!-- end .container -->
        </div><!-- end #content .content-wrapper -->

        <!-- Footer and social media list -->
        <footer id="site-map" class="main-footer" role="contentinfo">
            <div class="main-footer-links">
                <div class="container">

                    
                    <a id="back-to-top-1" class="jump-link" href="#python-network"><span aria-hidden="true" class="icon-arrow-up"><span>&#9650;</span></span> Back to Top</a>

                    

<ul class="sitemap navigation menu do-not-print" role="tree" id="container">
    
    <li class="tier-1 element-1">
        <a href="/about/" >About</a>
        
            

<ul class="subnav menu">
    
        <li class="tier-2 element-1" role="treeitem"><a href="/about/apps/" title="">Applications</a></li>
    
        <li class="tier-2 element-2" role="treeitem"><a href="/about/quotes/" title="">Quotes</a></li>
    
        <li class="tier-2 element-3" role="treeitem"><a href="/about/gettingstarted/" title="">Getting Started</a></li>
    
        <li class="tier-2 element-4" role="treeitem"><a href="/about/help/" title="">Help</a></li>
    
</ul>

        
    </li>
    
    <li class="tier-1 element-2">
        <a href="/downloads/" >Downloads</a>
        
            

<ul class="subnav menu">
    
        <li class="tier-2 element-1" role="treeitem"><a href="/downloads/" title="">All releases</a></li>
    
        <li class="tier-2 element-2" role="treeitem"><a href="/downloads/source/" title="">Source code</a></li>
    
        <li class="tier-2 element-3" role="treeitem"><a href="/downloads/windows/" title="">Windows</a></li>
    
        <li class="tier-2 element-4" role="treeitem"><a href="/downloads/mac-osx/" title="">Mac OS X</a></li>
    
        <li class="tier-2 element-5" role="treeitem"><a href="/download/other/" title="">Other Platforms</a></li>
    
        <li class="tier-2 element-6" role="treeitem"><a href="https://docs.python.org/3/license.html" title="">License</a></li>
    
        <li class="tier-2 element-7" role="treeitem"><a href="/download/alternatives" title="">Alternative Implementations</a></li>
    
</ul>

        
    </li>
    
    <li class="tier-1 element-3">
        <a href="/doc/" >Documentation</a>
        
            

<ul class="subnav menu">
    
        <li class="tier-2 element-1" role="treeitem"><a href="/doc/" title="">Docs</a></li>
    
        <li class="tier-2 element-2" role="treeitem"><a href="/doc/av" title="">Audio/Visual Talks</a></li>
    
        <li class="tier-2 element-3" role="treeitem"><a href="https://wiki.python.org/moin/BeginnersGuide" title="">Beginner&#39;s Guide</a></li>
    
        <li class="tier-2 element-4" role="treeitem"><a href="https://docs.python.org/devguide/" title="">Developer&#39;s Guide</a></li>
    
        <li class="tier-2 element-5" role="treeitem"><a href="https://docs.python.org/faq/" title="">FAQ</a></li>
    
        <li class="tier-2 element-6" role="treeitem"><a href="http://wiki.python.org/moin/Languages" title="">Non-English Docs</a></li>
    
        <li class="tier-2 element-7" role="treeitem"><a href="http://python.org/dev/peps/" title="">PEP Index</a></li>
    
        <li class="tier-2 element-8" role="treeitem"><a href="https://wiki.python.org/moin/PythonBooks" title="">Python Books</a></li>
    
</ul>

        
    </li>
    
    <li class="tier-1 element-4">
        <a href="/community/" >Community</a>
        
            

<ul class="subnav menu">
    
        <li class="tier-2 element-1" role="treeitem"><a href="/community/diversity/" title="">Diversity</a></li>
    
        <li class="tier-2 element-2" role="treeitem"><a href="/community/irc/" title="">IRC</a></li>
    
        <li class="tier-2 element-3" role="treeitem"><a href="/community/lists/" title="">Mailing Lists</a></li>
    
        <li class="tier-2 element-4" role="treeitem"><a href="/community/workshops/" title="">Python Conferences</a></li>
    
        <li class="tier-2 element-5" role="treeitem"><a href="/community/sigs/" title="">Special Interest Groups</a></li>
    
        <li class="tier-2 element-6" role="treeitem"><a href="https://wiki.python.org/moin/" title="">Python Wiki</a></li>
    
        <li class="tier-2 element-7" role="treeitem"><a href="/community/logos/" title="">Python Logo</a></li>
    
        <li class="tier-2 element-8" role="treeitem"><a href="/community/merchandise/" title="">Merchandise</a></li>
    
        <li class="tier-2 element-9" role="treeitem"><a href="/community/awards" title="">Community Awards</a></li>
    
</ul>

        
    </li>
    
    <li class="tier-1 element-5">
        <a href="/about/success/" title="success-stories">Success Stories</a>
        
            

<ul class="subnav menu">
    
        <li class="tier-2 element-1" role="treeitem"><a href="/about/success/#arts" title="">Arts</a></li>
    
        <li class="tier-2 element-2" role="treeitem"><a href="/about/success/#business" title="">Business</a></li>
    
        <li class="tier-2 element-3" role="treeitem"><a href="/about/success/#education" title="">Education</a></li>
    
        <li class="tier-2 element-4" role="treeitem"><a href="/about/success/#engineering" title="">Engineering</a></li>
    
        <li class="tier-2 element-5" role="treeitem"><a href="/about/success/#government" title="">Government</a></li>
    
        <li class="tier-2 element-6" role="treeitem"><a href="/about/success/#scientific" title="">Scientific</a></li>
    
        <li class="tier-2 element-7" role="treeitem"><a href="/about/success/#software-development" title="">Software Development</a></li>
    
</ul>

        
    </li>
    
    <li class="tier-1 element-6">
        <a href="/blogs/" title="News from around the Python world">News</a>
        
            

<ul class="subnav menu">
    
        <li class="tier-2 element-1" role="treeitem"><a href="/blogs/" title="Python Insider Blog Posts">Python News</a></li>
    
        <li class="tier-2 element-2" role="treeitem"><a href="http://planetpython.org/" title="Planet Python">Community News</a></li>
    
        <li class="tier-2 element-3" role="treeitem"><a href="http://pyfound.blogspot.com/" title="PSF Blog">PSF News</a></li>
    
        <li class="tier-2 element-4" role="treeitem"><a href="http://pycon.blogspot.com/" title="PyCon Blog">PyCon News</a></li>
    
</ul>

        
    </li>
    
    <li class="tier-1 element-7">
        <a href="/events/" >Events</a>
        
            

<ul class="subnav menu">
    
        <li class="tier-2 element-1" role="treeitem"><a href="/events/python-events/" title="">Python Events</a></li>
    
        <li class="tier-2 element-2" role="treeitem"><a href="/events/python-user-group/" title="">User Group Events</a></li>
    
        <li class="tier-2 element-3" role="treeitem"><a href="/events/python-events/past/" title="">Python Events Archive</a></li>
    
        <li class="tier-2 element-4" role="treeitem"><a href="/events/python-user-group/past/" title="">User Group Events Archive</a></li>
    
        <li class="tier-2 element-5" role="treeitem"><a href="https://wiki.python.org/moin/PythonEventsCalendar#Submitting_an_Event" title="">Submit an Event</a></li>
    
</ul>

        
    </li>
    
    <li class="tier-1 element-8">
        <a href="/dev/" >Contributing</a>
        
            

<ul class="subnav menu">
    
        <li class="tier-2 element-1" role="treeitem"><a href="http://docs.python.org/devguide/" title="">Developer&#39;s Guide</a></li>
    
        <li class="tier-2 element-2" role="treeitem"><a href="http://bugs.python.org/" title="">Issue Tracker</a></li>
    
        <li class="tier-2 element-3" role="treeitem"><a href="https://mail.python.org/mailman/listinfo/python-dev" title="">python-dev list</a></li>
    
        <li class="tier-2 element-4" role="treeitem"><a href="http://pythonmentors.com/" title="">Core Mentorship</a></li>
    
</ul>

        
    </li>
    
</ul>


                    <a id="back-to-top-2" class="jump-link" href="#python-network"><span aria-hidden="true" class="icon-arrow-up"><span>&#9650;</span></span> Back to Top</a>
                    

                </div><!-- end .container -->
            </div> <!-- end .main-footer-links -->

            <div class="site-base">
                <div class="container">
                    
                    <ul class="footer-links navigation menu do-not-print" role="tree">
                        <li class="tier-1 element-1"><a href="/about/help/">Help &amp; <span class="say-no-more">General</span> Contact</a></li>
                        <li class="tier-1 element-2"><a href="/community/diversity/">Diversity <span class="say-no-more">Initiatives</span></a></li>
                        <li class="tier-1 element-3"><a href="https://github.com/python/pythondotorg/issues">Submit Website Bug</a></li>
                        <!--<li class="tier-1 element-3"><a href="#"><span class="say-no-more">Website</span> Colophon</a></li>-->
                    </ul>

                    <div class="copyright">
                        <p><small>
                            <span class="pre">Copyright &copy;2001-2015.</span>
                            &nbsp;<span class="pre"><a href="/psf-landing/">Python Software Foundation</a></span>
                            &nbsp;<span class="pre"><a href="/about/legal/">Legal Statements</a></span>
                            &nbsp;<span class="pre"><a href="/privacy/">Privacy Policy</a></span>
                        </small></p>
                    </div>

                </div><!-- end .container -->
            </div><!-- end .site-base -->

        </footer>

    </div><!-- end #touchnav-wrapper -->

    
    <script src="//ajax.googleapis.com/ajax/libs/jquery/1.8.2/jquery.min.js"></script>
    <script>window.jQuery || document.write('<script src="/static/js/libs/jquery-1.8.2.min.js"><\/script>')</script>

    <script src="/static/js/libs/masonry.pkgd.min.js"></script>

    <script type="text/javascript" src="/static/js/main-min.js" charset="utf-8"></script>
    

    <!--[if lte IE 7]>
    <script type="text/javascript" src="/static/js/plugins/IE8-min.js" charset="utf-8"></script>
    
    
    <![endif]-->

    <!--[if lte IE 8]>
    <script type="text/javascript" src="/static/js/plugins/getComputedStyle-min.js" charset="utf-8"></script>
    
    
    <![endif]-->

    

    
    

</body>
</html>


In [1]:
f = open('gov/51099604_1026604961.txt', 'r')
response_text = f.read()
f.close()


---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-1-cfa578ace398> in <module>()
----> 1 f = open('gov/51099604_1026604961.txt', 'r')
      2 response_text = f.read()
      3 f.close()

IOError: [Errno 2] No such file or directory: 'gov/51099604_1026604961.txt'

In [2]:
from bs4 import BeautifulSoup
res = BeautifulSoup(response_text)
#print res.text

In [3]:
#for th in res.select('th'):
    #print th
    #print th.text.strip()

In [13]:
for tr in res.select('tr'):
    th = tr.select('th')
    if len(th) > 0:
        #print tr.select('th')[0].text.strip()
        if tr.select('th')[0].text.strip().encode('utf-8') == '機關代碼':
            #print tr
            print tr.select('td')[0].text.strip()


3.13.30.20

In [5]:
import requests
res = requests.get('http://www.comicvip.com/show/cool-103.html?ch=800')
print res.text


<META http-equiv="Content-Type" content="text/html; charset=big5"><title>®ü¸é¤ý 1 - 800  ®ü¸é¤ýº©µe½u¤WÆ[¬Ý µL­­°Êº© 8comic.com</title><LINK href=/css/global.css type=text/css rel=stylesheet><META name=keywords content="free comic,online comic,free anime download,free cartoon download,®ü¸é¤ý,¦b½uº©µe,½u¤WÆ[¬Ý,§K¶Oº©µe,º©µe¤U¸ü,º©µe¹Ï¤ù,°Êµe¤U¸ü,°Êµe,º©µe,¥d³q,°Êº©µL­­"><META name=description content="®ü¸é¤ý§K¶Oº©µe½u¤WÆ[¬Ý - free online comic"><head></head>
<script>if(document.location.href.toLowerCase().indexOf("best-manga-104.html?ch=618")>0) document.location.href="temp104.html?ch=618";</script>
<TABLE width=100% border=0 align=center cellPadding=0 cellSpacing=0 bgcolor=#000000><TR><TD width=10% colspan=2 bgcolor=#000000><a href=/ target=_top><img src=/images/logos.gif name="logo" border=0 align=absmiddle id=logo></a></TD>
<TD style="background:url(/images/tbg.jpg);background-repeat:no-repeat;"><script src=/js/top.js></script></TD><TD width=10% align=right valign=bottom nowrap="nowrap"><a href=/msg><img src=/msg/images/newmsg0.gif border=0 align=absmiddle></a> <a href=/msg id=Msg2 class=w>º©µe³ø¿ù </a><img src=/images/vline.gif border=0 align=absmiddle/> <a href=/member/favourite.aspx class=w>¦¬Â稾ã²z</a> <img src=/images/vline.gif border=0 align=absmiddle/> <a href=/comic/u-1.html class=w>¨C¤é§ó·sº©µe¦Cªí</a> <img src=/images/vline.gif border=0 align=absmiddle/><a href=/comic/all.html class=w>¥þ¯¸º©µe¦Cªí</a> &nbsp;</TD></TR><TR><TD colspan=4 background="/images/menubg2.gif" bgcolor="#CCCCCC" ><TABLE width=100% border=0 align=center cellPadding=0 cellspacing=0><TR align="center"><th height="40" >&nbsp;</th><td><a href=/comic/1-1.html>®æ°«</a></td><th><img src=/images/l.gif></th><td><a href=/comic/6-1.html>«_ÀI</a></td><th><img src=/images/l.gif></th><td><a href=/comic/5-1.html>ĵ±´</a></td><th><img src=/images/l.gif></th><td><a href=/comic/4-1.html>¾Ô°ê</a></td><th><img src=/images/l.gif /></th><td><a href=/comic/2-1.html>Äv§Þ</a></td><th><img src=/images/l.gif /></th><td><a href=/comic/16-1.html>¦ë¶¹</a></td><th><img src=/images/l.gif /></th><td><a href=/comic/7-1.html>¾÷±ñ</a></td><th><img src=/images/l.gif /></th><td><a href=/comic/22-1.html>¬ì¤Û</a></td><th><img src=/images/l.gif></th><td><a href=/comic/3-1.html>Å]ªk</a></td><th><img src=/images/l.gif></th><td><a href=/comic/8-1.html>§¯Å]</a></td><th><img src=/images/l.gif></th><td><a href=/comic/19-1.html>®£©Æ</a></td><th><img src=/images/l.gif></th><td><a href=/comic/21-1.html>¾·~</a></td><th><img src=/images/l.gif></th><td><a href=/comic/9-1.html>´ä²£</a></td><th><img src=/images/l.gif></th><td><a href=/comic/11-1.html>¤Ö¤k</a></td><th><img src=/images/l.gif></th><td><a href=/comic/12-1.html>¤Ö¨k</a></td><th><img src=/images/l.gif></th><td><a href=/comic/15-1.html>¶W¤k</a></td><th><img src=/images/l.gif></th><td><a href=/comic/13-1.html>·d¯º</a></td><th><img src=/images/l.gif></th><td><a href=/comic/14-1.html>®Õ¶é</a></td><th><img src=/images/l.gif></th><td><a href=/comic/18-1.html>µu½g</a></td><th><img src=/images/l.gif></th><td><a href=/comic/20-1.html>¨ä¥L</a></td><th align="right">&nbsp;</th></TR></TABLE></TD></TR></TABLE><script src="/js/nview.js"></script><form method="post" action="#" id="Form1">
<div class="aspNetHidden">
<input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUKLTMwMDQyMTI1NmRksmOdHDYdo09eoeFgIU99wK+xiy7MZASgFvsMBA0VztE=" />
</div>

<div class="aspNetHidden">

	<input type="hidden" name="__VIEWSTATEGENERATOR" id="__VIEWSTATEGENERATOR" value="3CB179A0" />
</div><table width=97% border=0 align=center cellpadding=0 cellspacing=0><tr><td width=90% nowrap=nowrap height=20><script src="/js/topview.js"></script></td></tr></table><table width=97% border=0 align=center cellpadding=0 cellspacing=0><tr><td align=center><table width="100%" border="0" cellspacing="0" cellpadding="0">
<tr>
<td height="10"></td>
</tr>
<tr>
<td align="center">
<script type="text/javascript"><!--
google_ad_client = "ca-pub-4955475422456028";
/* 8C_970X90 */
google_ad_slot = "8296529878";
google_ad_width = 970;
google_ad_height = 90;
//-->
</script>
<script type="text/javascript"
src="//pagead2.googlesyndication.com/pagead/show_ads.js">
</script>
</td>
</tr>
</table>
<table width="100%" height="10" border="0" cellpadding="0" cellspacing="0">
<tr>
<td></td>
</tr>
</table></td></tr></table><table width=97% height=18 border=0 align=center cellpadding=0 cellspacing=0 background=/images/barc.gif><tr><td width=20% nowrap=nowrap><b> <font color=#EE3366><img src=/images/barl.gif align=absmiddle />¥¿¦bÆ[¬Ý:[ ®ü¸é¤ý <font id=lastchapter>800</font> ]</font></b></td><td width=10% align=left nowrap=nowrap><a href=# onClick=pv();return false; id=prevvol ><font face=webdings>7</font> ¤W¶°: <font id="prevname"></font></a>&nbsp; </td><td width=10% align=right nowrap=nowrap><a href=# onClick=jp();return false; id=prev ><b><font color=blue>[ <font face=webdings>3</font> ¤W ­¶ ]</font></b></a></td><td width=10% height=1% align=center nowrap=nowrap id=nav><select name=select id=pageindex onChange=j(this.value) style="font-size:10px;font-family:arial"></select></td><td width=10% align=left nowrap=nowrap><a href=# onClick=jn();return false; id=next><b><font color=blue>[ ¤U ­¶<font face=webdings>4</font> ]</font></b></a></td><td width=10% align=right nowrap=nowrap><a href=# onClick=nv();return false; id=nextvol >¤U¶°: <font id="nextname"></font><font face=webdings>8</font></a></td><td width=24% align=right nowrap=nowrap><a href=# onclick=lv();return false; id=lastvol class=red ><b>³Ì·s:[®ü¸é¤ý 800 ]<font color=#0099CC></font></b></a><img src=/images/barr.gif align=absmiddle /></td>  </tr></table><table width=97% border=0 align=center cellpadding=0 cellspacing=0><tr><td align=center><table height="2" border="0" cellpadding="0" cellspacing="0"><tr><td> </td></tr></table>
<div class="sitemajidr" model="728x90" position="top"></div>
<script type="text/javascript" src="http://ad.sitemaji.com/ysm_8comic.js"></script>
<table height="10" border="0" cellpadding="0" cellspacing="0"><tr><td> </td></tr></table></td></tr></table><table width=97% border=0 align=center id=TheTable cellpadding=0 cellspacing=0><tr><td align=center><img name=TheImg  border=0 id=TheImg /></td></tr></table><table width=97%  align=center cellpadding=0 cellspacing=0  ><tr><td height=5 align=center></td></tr><tr><td align=center><div class="sitemajiad" model="728x90" position="bottom"></div>
<script type="text/javascript" src="http://ad.sitemaji.com/ysm_8comic.js"></script>
<table height="1" border="0" cellpadding="0" cellspacing="0"><tr><td> </td></tr></table></td></tr><tr><td height=15 align=center id=prevnext2><a href=# onClick=jp();return false; id=prev2 style=line-height:14px><font color=blue><b>[ ¤W ­¶ ]</b></font></a>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<a href=# onClick=jn();return false; id=next2 style=line-height:14px><font color=blue><b>[ ¤U ­¶ ]</b></font></a></td></tr><tr><td align=center><table width="100%" border="0" align="center" cellpadding="0" cellspacing="0">
<tr>
<td height="4"></td>
</tr>
<tr>
<td align="center">
<script type="text/javascript"><!--
google_ad_client = "ca-pub-4955475422456028";
/* 8C_970X250 */
google_ad_slot = "5343114118";
google_ad_width = 970;
google_ad_height = 250;
//-->
</script>
<script type="text/javascript"
src="//pagead2.googlesyndication.com/pagead/show_ads.js">
</script>
</td>
</tr>
</table>
<br>
<br>
<br>
<br>
<br>
</td></tr></table><table width=97% border=0 align=center cellpadding=0 cellspacing=0 background=/images/barc.gif><tr><td><img src=/images/barl.gif align=absmiddle /></td><td width=5% align="right" nowrap=nowrap><b><font color=#EE3366></font></b>Âê©w¤ñ¨Ò¡G</td><td nowrap=nowrap width=15%><img id=za style=CURSOR: hand onclick=zoomaa(this) alt=¹ê»Ú¤j¤p hspace=2 src=/images/zooma.gif align=absmiddle/><img id=zf style= CURSOR: hand onclick=zoomff(this) hspace=4 src=/images/zoomf.gif align=absmiddle/></td><td width=20% height=1% align=center nowrap=nowrap>«ö[<font color=#FF0000>a</font>] 100%Åã¥Ü «ö [<font color=#FF0000>z</font>] ¾AÀ³¤j¤p</td><td width=20% align=center nowrap=nowrap id="pagenum"></td><td width=20% align=center nowrap=nowrap><font color=#0066CC>[«öªÅ®æ:¥ª¥k²¾°Ê]</font>&nbsp;</td><td nowrap=nowrap align=right width=20%>&nbsp;<a href=javascript:window.close();>[ Ãö³¬ ]</a>&nbsp;&nbsp;&nbsp;<a href=#><img src=/images/top.gif hspace=2 border=0 align=absmiddle /></a></td><td nowrap=nowrap align=right><img src=/images/barr.gif align=absmiddle /></td></tr></table><script>var chs=800;var ti=103;var cs='';eval(unescape('sp%28%29;'));</script><table cellspacing=0 cellpadding=5 width=97% align=center border=0><tr><td width=80% height=1% nowrap=nowrap><table border=0 align=center cellpadding=4 cellspacing=0><tr><td align=center><script src="/js/bottom.js"></script></td></tr></table></td></tr></table><table width=100% border=0 cellspacing=0 cellpadding=0 align=center style=margin-top:1px ><tr><td height=1 bgcolor=#dddddd></td></tr><tr height=20 bgcolor=#eeeeee align=center><td></td></tr><tr><td height=60 align=center nowrap=nowrap bgcolor=#eeeeee ><br><a href=http://www.2comic.com target=_blank >°Êº©©ö</a> | <a href=http://www.6comic.com target=_blank >·È·È°Êº©</a> | <a href=http://www.862.tw target=_blank >862¹CÀ¸</a> | <a href=http://www.8book.com target=_blank >µL­­¤p»¡</a> <br><br><font color=#BFBFBF>¥»¯¸©Ò¦³¹Ï¤ù§¡¨Ó¦Ûºô¸ô¸ê·½,¨Ã¥B§K¶O´£¨Ñ,¦p¦³¥ô¦ó°ÝÃD½Ð»P§Ú­ÌÁpô,½T»{«á¥»¯¸±N·|¶i¦æ­×¥¿.</font></p></td></tr></td></table></form></body><img src=http://ad.yieldmanager.com/pixel?id=1077990&t=2 width=1 height=1 /><img src=http://extern.yahoo.com/b?s=1197365499 width=1 height=1 />

Selenium

install firefox plugin

install selenium

  • pip install selenium

In [16]:
# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import time, re
from bs4 import BeautifulSoup
import shutil

# use selenium to open firefox
driver = webdriver.Firefox()

# let firefox wait 30 seconds
driver.implicitly_wait(30)

# base url
base_url = "http://www.comicvip.com/show/cool-103.html?ch=800"

# sasve comic function
def savecomic(soup):
    for img in soup.select('img'):
        if 'jpg' in img.get('src'):
            comic_link = img.get('src')
            res = requests.get(comic_link, stream=True)
            f = open(comic_link.split('/')[-1], 'wb')
            shutil.copyfileobj(res.raw, f)
            f.close()
            
driver.get(base_url)
soup = BeautifulSoup(driver.page_source)
savecomic(soup)
while True:
    driver.implicitly_wait(1)    
    driver.find_element_by_css_selector("#next > b > font").click()
    soup = BeautifulSoup(driver.page_source)
    savecomic(soup)

# Close driver
driver.close()

#driver.find_element_by_css_selector("#next > b > font").click()


---------------------------------------------------------------------------
UnexpectedAlertPresentException           Traceback (most recent call last)
<ipython-input-16-bbd639d1ffb9> in <module>()
     35     driver.implicitly_wait(1)
     36     driver.find_element_by_css_selector("#next > b > font").click()
---> 37     soup = BeautifulSoup(driver.page_source)
     38     savecomic(soup)
     39 

C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.pyc in page_source(self)
    462             driver.page_source
    463         """
--> 464         return self.execute(Command.GET_PAGE_SOURCE)['value']
    465 
    466     def close(self):

C:\Python27\lib\site-packages\selenium\webdriver\remote\webdriver.pyc in execute(self, driver_command, params)
    199         response = self.command_executor.execute(driver_command, params)
    200         if response:
--> 201             self.error_handler.check_response(response)
    202             response['value'] = self._unwrap_value(
    203                 response.get('value', None))

C:\Python27\lib\site-packages\selenium\webdriver\remote\errorhandler.pyc in check_response(self, response)
    178             raise exception_class(response, message)
    179         elif exception_class == UnexpectedAlertPresentException and 'alert' in value:
--> 180             raise exception_class(message, screen, stacktrace, value['alert'].get('text'))
    181         raise exception_class(message, screen, stacktrace)
    182 

<type 'str'>: (<type 'exceptions.UnicodeEncodeError'>, UnicodeEncodeError('ascii', u"Alert Text: \u5df2\u7d93\u662f\u6700\u5f8c\u4e00\u9801\u4e86\n<super: <class 'WebDriverException'>, <UnexpectedAlertPresentException object>>", 12, 20, 'ordinal not in range(128)'))

In [ ]:
from seleniumrequests import Firefox
from bs4 import BeautifulSoup

webdriver = Firefox()
response = webdriver.request('GET', 'http://www.comicvip.com/show/cool-103.html?ch=800')
soup = BeautifulSoup(response.text)
for img in soup.select('img'):
    print img

In [8]:
import requests
import json
res = requests.get('https://api.muzikair.com/v3/air/play/px2gg8?page=webpage_all&keywords=&count=2&lang=tw')
#print res.text
jd = json.loads(res.text)
playlink = jd['data']['url']['track']
print playlink
res2 = requests.get(playlink, stream=True)
import shutil
f = open('x.mp3', 'wb')
shutil.copyfileobj(res2.raw, f)
f.close()


https://play.muzik-online.com/v3/play/px2gg8?token=K1%2FXK%2BylFLCJad9dyk3ls57m1GkdiXbkp5g%3D

In [13]:
import requests

from bs4 import BeautifulSoup
#import os
#os.mkdir('gov')
res = requests.get('http://web.pcc.gov.tw/tps/main/pms/tps/atm/atmAwardAction.do?newEdit=false&searchMode=common&method=inquiryForPublic&pkAtmMain=51099604&tenderCaseNo=1026604961')
soup = BeautifulSoup(res.text, "html.parser")
f = open('gov/51099604_1026604961.txt', 'w')
printarea = soup.select('#printArea')[0].prettify('utf-8')
f.write(printarea)
f.close()

f = open('gov/51099604_1026604961.txt', 'r')
response_text = f.read()
f.close()

In [22]:
soup2 = BeautifulSoup(response_text)
for tr in soup2.select('tr'):
    if len(tr.select('th')) > 0:
        if tr.select('th')[0].text.strip().encode('utf-8') == '機關代碼':
            #print tr.select('th')[0].text.strip().encode('utf-8')
            print tr.select('td')[0].text.strip().encode('utf-8')


3.13.30.20

In [ ]:
<tr class="award_table_tr_1">
		<th valign="middle" bgcolor="#DAEBED" class="T11b" width="200" align="left">機關代碼</th>
		<td bgcolor="#EFF1F1" class="newstop" height="25">
			3.13.30.20
		</td>
</tr>

In [23]:
s = " \t string test \t \n\n\r" ' \tstring test \t \n\n\r'
print s


 	 string test 	 

 	string test 	 



In [24]:
print s.strip()


string test 	 

 	string test

In [25]:
print s.rstrip()


 	 string test 	 

 	string test

In [26]:
print s.lstrip()


string test 	 

 	string test 	 



In [28]:
soup2 = BeautifulSoup(response_text)
for tr in soup2.select('tr'):
    if len(tr.select('th')) > 0:
        if tr.select('th')[0].text.strip().encode('utf-8') == '機關代碼':
            print tr.select('td')[0].text.strip().encode('utf-8')
        if tr.select('th')[0].text.strip().encode('utf-8') == '機關名稱':
            print tr.select('td')[0].text.strip().encode('utf-8')
        if tr.select('th')[0].text.strip().encode('utf-8') == '單位名稱':
            print tr.select('td')[0].text.strip().encode('utf-8')


3.13.30.20
臺灣糖業股份有限公司休閒遊憩事業部
臺灣糖業股份有限公司休閒遊憩事業部

In [29]:
dic = {'a':100, 'b':"yes", 'c':0.98}
print dic


{'a': 100, 'c': 0.98, 'b': 'yes'}

In [30]:
#get keys in dictionary
print dic.keys()
#get values in dictionary
print dic.values()


['a', 'c', 'b']
[100, 0.98, 'yes']

In [31]:
#get value of given key
print dic['a']
#get value of given key
print dic.get('a')


100
100

In [32]:
#get value of given key
print dic['d']


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-32-73f92c082065> in <module>()
      1 #get value of given key
----> 2 print dic['d']

KeyError: 'd'

In [36]:
#get value of given key
print dic.get('d')
print dic.get('d', 'qoo')
print dic.get('a', 'qoo')


None
qoo
100

In [39]:
dic['d'] = 'qoo'
print dic

qoo = {'e':1., 'f':99}
dic.update(qoo)


{'a': 100, 'c': 0.98, 'b': 'yes', 'd': 'qoo'}

In [40]:
print dic


{'a': 100, 'c': 0.98, 'b': 'yes', 'e': 1.0, 'd': 'qoo', 'f': 99}

In [44]:
dic = {"機關代碼":"","機關名稱":"","機關地址":"", "聯絡人":""}
soup2 = BeautifulSoup(response_text)
for tr in soup2.select('tr'):
    if len(tr.select('th')) > 0:
        if tr.select('th')[0].text.strip().encode('utf-8') in dic:
            print tr.select('th')[0].text.strip().encode('utf-8'),
            print tr.select('td')[0].text.strip().encode('utf-8')


機關代碼 3.13.30.20
機關名稱 臺灣糖業股份有限公司休閒遊憩事業部
機關地址 717臺南市仁德區文華路2段326號
聯絡人 吳政德

In [45]:
dic = {"機關代碼":"","機關名稱":"","機關地址":"", "聯絡人":""}
soup2 = BeautifulSoup(response_text)
for tr in soup2.select('tr'):
    if len(tr.select('th')) > 0:
        if tr.select('th')[0].text.strip().encode('utf-8') in dic:
            dic[tr.select('th')[0].text.strip().encode('utf-8')] = tr.select('td')[0].text.strip().encode('utf-8')

In [49]:
print dic
for key in dic:
    print key, dic[key]


{'\xe8\x81\xaf\xe7\xb5\xa1\xe4\xba\xba': '\xe5\x90\xb3\xe6\x94\xbf\xe5\xbe\xb7', '\xe6\xa9\x9f\xe9\x97\x9c\xe5\x90\x8d\xe7\xa8\xb1': '\xe8\x87\xba\xe7\x81\xa3\xe7\xb3\x96\xe6\xa5\xad\xe8\x82\xa1\xe4\xbb\xbd\xe6\x9c\x89\xe9\x99\x90\xe5\x85\xac\xe5\x8f\xb8\xe4\xbc\x91\xe9\x96\x92\xe9\x81\x8a\xe6\x86\xa9\xe4\xba\x8b\xe6\xa5\xad\xe9\x83\xa8', '\xe6\xa9\x9f\xe9\x97\x9c\xe4\xbb\xa3\xe7\xa2\xbc': '3.13.30.20', '\xe6\xa9\x9f\xe9\x97\x9c\xe5\x9c\xb0\xe5\x9d\x80': '717\xe8\x87\xba\xe5\x8d\x97\xe5\xb8\x82\xe4\xbb\x81\xe5\xbe\xb7\xe5\x8d\x80\xe6\x96\x87\xe8\x8f\xaf\xe8\xb7\xaf2\xe6\xae\xb5326\xe8\x99\x9f'}
聯絡人 吳政德
機關名稱 臺灣糖業股份有限公司休閒遊憩事業部
機關代碼 3.13.30.20
機關地址 717臺南市仁德區文華路2段326號

In [59]:
dic = {"標的分類":""}
soup2 = BeautifulSoup(response_text)
for tr in soup2.select('tr'):
    if len(tr.select('th')) > 0:
        if tr.select('th')[0].text.strip().encode('utf-8') in dic:
            print tr.select('td')[0].text.strip().encode('utf-8')


<勞務類>
     
      96
     

      娛樂,文化,體育服務
     

      核准人職稱姓名:執行長黃進良
      
       更改人職稱姓名:管理師吳政德
       
        核准日期:103/04/28
        
         核准文號:簽報單位主管核准在案

In [60]:
a ='\t\t string \n\t'
print a


		 string 
	

In [61]:
print repr(a)


'\t\t string \n\t'

In [62]:
dic = {"標的分類":""}
soup2 = BeautifulSoup(response_text)
for tr in soup2.select('tr'):
    if len(tr.select('th')) > 0:
        if tr.select('th')[0].text.strip().encode('utf-8') in dic:
            print repr(tr.select('td')[0].text.strip().encode('utf-8'))


'<\xe5\x8b\x9e\xe5\x8b\x99\xe9\xa1\x9e>\n     \n      96\n     \n\n      \xe5\xa8\x9b\xe6\xa8\x82,\xe6\x96\x87\xe5\x8c\x96,\xe9\xab\x94\xe8\x82\xb2\xe6\x9c\x8d\xe5\x8b\x99\n     \n\n      \xe6\xa0\xb8\xe5\x87\x86\xe4\xba\xba\xe8\x81\xb7\xe7\xa8\xb1\xe5\xa7\x93\xe5\x90\x8d\xef\xbc\x9a\xe5\x9f\xb7\xe8\xa1\x8c\xe9\x95\xb7\xe9\xbb\x83\xe9\x80\xb2\xe8\x89\xaf\n      \n       \xe6\x9b\xb4\xe6\x94\xb9\xe4\xba\xba\xe8\x81\xb7\xe7\xa8\xb1\xe5\xa7\x93\xe5\x90\x8d\xef\xbc\x9a\xe7\xae\xa1\xe7\x90\x86\xe5\xb8\xab\xe5\x90\xb3\xe6\x94\xbf\xe5\xbe\xb7\n       \n        \xe6\xa0\xb8\xe5\x87\x86\xe6\x97\xa5\xe6\x9c\x9f\xef\xbc\x9a103/04/28\n        \n         \xe6\xa0\xb8\xe5\x87\x86\xe6\x96\x87\xe8\x99\x9f\xef\xbc\x9a\xe7\xb0\xbd\xe5\xa0\xb1\xe5\x96\xae\xe4\xbd\x8d\xe4\xb8\xbb\xe7\xae\xa1\xe6\xa0\xb8\xe5\x87\x86\xe5\x9c\xa8\xe6\xa1\x88'

In [67]:
a = '123,456,789,qoo'
print a.split(',')
print a.split(',',1)
print a.split(',',2)


['123', '456', '789', 'qoo']
['123', '456,789,qoo']
['123', '456', '789,qoo']

In [74]:
c = ['1','2','3','4']
print '|'.join(c)

a = '123                        21333333333333333333333                    999'
print a.split()

print ' '.join(a.split())
print '|'.join(a.split())


1|2|3|4
['123', '21333333333333333333333', '999']
123 21333333333333333333333 999
123|21333333333333333333333|999

In [76]:
dic = {"標的分類":""}
soup2 = BeautifulSoup(response_text)
for tr in soup2.select('tr'):
    if len(tr.select('th')) > 0:
        if tr.select('th')[0].text.strip().encode('utf-8') in dic:
            print ' '.join(tr.select('td')[0].text.strip().encode('utf-8').split())


<勞務類> 96 娛樂,文化,體育服務 核准人職稱姓名:執行長黃進良 更改人職稱姓名:管理師吳政德 核准日期:103/04/28 核准文號:簽報單位主管核准在案

In [83]:
from datetime import date,datetime
currenttime = datetime.now()
print type(currenttime)
print type(currenttime.strftime("%Y-%m-%d"))
print currenttime.strftime("%Y-%m-%d")
print currenttime.strftime("%Y-%m-%d %H:%M:%S")


a = '2014-05-03 14:00'
print datetime.strptime(a, "%Y-%m-%d %H:%M")
print type(datetime.strptime(a, "%Y-%m-%d %H:%M"))



#t = '102/11/05 10:00'


<type 'datetime.datetime'>
<type 'str'>
2015-09-18
2015-09-18 13:59:51
2014-05-03 14:00:00
<type 'datetime.datetime'>

In [90]:
t = '102/11/05 10:00'
getyear =  t.split('/', 1)
print int(getyear[0]) + 1911
adtime =  '/'.join([str(int(getyear[0]) + 1911), getyear[1]])
print datetime.strptime(adtime, '%Y/%m/%d %H:%M')
print type(datetime.strptime(adtime, '%Y/%m/%d %H:%M'))


2013
2013-11-05 10:00:00
<type 'datetime.datetime'>

In [91]:
import re
m = re.match( r"([0-9,]+)元", '6,288,452元')
print ''.join(m.group(1).split(','))


6288452

In [108]:
email1 = 'david@iii.com'
import re
m = re.match('(\w+)@([\w\.]+)', email1)
#print m.group(1)
#print m.group(2)



email_list = ['david@iii.com', 'qoo@oop.com', '12313213$999.com']
for email in email_list:
    m = re.match('(\w+)@([\w\.]+)', email)
    if m:
        print "username:", m.group(1)

phone = '0912345678'
m = re.match('\d{10}', phone)
print m


phone_list = ['0912345678', '0912-345-678', '0912-345678', '10238018290829085024']
for phone in phone_list:
    m = re.match('(\d+)', phone)
    if m:
        print "phone:", m.group(1)

print '============================'
for phone in phone_list:
    m = re.match('(\d{4}-\d+)', phone)
    if m:
        print "phone:", m.group(1)
print '============================'
for phone in phone_list:
    m = re.match('(\d{4}-{0,1}\d+)', phone)
    if m:
        print "phone:", m.group(1)
print '============================'
for phone in phone_list:
    m = re.match('(\d{4}-?\d+)', phone)
    if m:
        print "phone:", m.group(1)
print '============================'
for phone in phone_list:
    m = re.match('(\d{4}-?\d{3}-?\d{3})', phone)
    if m:
        print "phone:", m.group(1)
print '============================'
for phone in phone_list:
    m = re.match('(^\d{4}-?\d{3}-?\d{3}$)', phone)
    if m:
        print "phone:", m.group(1)


username: david
username: qoo
<_sre.SRE_Match object at 0x05726410>
phone: 0912345678
phone: 0912
phone: 0912
phone: 10238018290829085024
============================
phone: 0912-345
phone: 0912-345678
============================
phone: 0912345678
phone: 0912-345
phone: 0912-345678
phone: 10238018290829085024
============================
phone: 0912345678
phone: 0912-345
phone: 0912-345678
phone: 10238018290829085024
============================
phone: 0912345678
phone: 0912-345-678
phone: 0912-345678
phone: 1023801829
============================
phone: 0912345678
phone: 0912-345-678
phone: 0912-345678

In [125]:
import requests
res = requests.get('http://ecapi.pchome.com.tw/ecshop/prodapi/v2/prod/DGBHAB-19006DLL1&fields=Seq,Id,Name,Nick,Store,PreOrdDate,SpeOrdDate,Price,Discount,Pic,Weight,ISBN,Qty,Bonus,isBig,isSpec,isCombine,isDiy,isRecyclable,isCarrier,isMedical,isBigCart,isSnapUp,isDescAndIntroSync,isFoodContents,isHuge&_callback=jsonpcb_prodecshop?_callback=jsonpcb_prodecshop')
#print res.text


import re
m = re.match('.+"P":(\d+)},.*',res.text)
#print m.group(1)


import re
m = re.search('"P":(\d+)},',res.text)
#print m.group(1)

import re
m = re.search('jsonpcb_prodecshop\((.*?)\);', res.text)
#print m.group(1)

import json
jd = json.loads(m.group(1))
print jd['DGBHAB-19006DLL1-000']['Price']['P']


28900

In [128]:
a = '我從口袋中撿到200元 超爽der 他只剩30元'
import re
m = re.search('(\d+)元', a)
print m.group(1)

import re
m = re.findall('(\d+)元', a)
print m


200
['200', '30']

In [133]:
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
from datetime import datetime
import os, sys, re

def get_response_element (file_name):
    f = open(file_name, 'r')
    response_text = f.read()
    f.close()
    soup = BeautifulSoup(response_text)
    tender_table = soup.select('.table_block.tender_table')[0]
    tr = tender_table.select('tr')
    return tr

#print get_response_element('gov/51099604_1026604961.txt')

In [135]:
def date_conversion(element):
    m = re.match(r"(\d+/\d+/\d+)",element)
    if m is not None:
        t = m.group(1).split('/', 1)
        if t[0] != '':
            return datetime.strptime(str(int(t[0]) + 1911) + "/" + (''.join(t[1:]).split()[0] ), "%Y/%m/%d")
    else:
        return None
    
print date_conversion('102/11/10')
print date_conversion('102/11/10 13:00')


2013-11-10 00:00:00
2013-11-10 00:00:00

In [148]:
def money_conversion(element):
    m = re.match( r"\$?(-?[0-9,]+)", element)
    return int(''.join(m.group(0).split(',')))

#print money_conversion('123,222,444')
#print money_conversion('123,222,444')
print money_conversion('-123,222,444元')


-123222444

In [150]:
def remove_space(element):
    return "".join(element.split())

print remove_space('     dsf dsf            dsfdsfdsf')


dsfdsfdsfdsfdsf

In [161]:
name_map = {"機關代碼":"entity_code", "機關名稱":"procuring_entity","標案案號":"job_number","招標方式":"procurement_type",\
            "決標方式":"tender_awarding_type","標案名稱":"subject_of_procurement", "決標資料類別":"attr_of_tender_awarding", \
            "標的分類":"attr_of_procurement", "預算金額":"budget_value", "開標時間":"opening_date","決標公告日期":"tender_awarding_announce_date",\
            "歸屬計畫類別":"project_type","總決標金額":"total_tender_awarding_value","底價金額":"floor_price_value",\
            "決標日期":"tender_awarding_date", "pkAtmMain":"pkAtmMain"} 
            
tender_awards_map = {"機關代碼":remove_space, "機關名稱":remove_space, "標案案號":remove_space, \
                     "招標方式":remove_space,"決標方式":remove_space,"標案名稱":remove_space, "決標資料類別":remove_space, \
                     "標的分類":remove_space, "預算金額":money_conversion, "開標時間":date_conversion,"歸屬計畫類別":remove_space, \
                        "總決標金額":money_conversion,"底價金額":money_conversion,"決標日期":date_conversion, "決標公告日期":date_conversion}

In [162]:
def a(str1):
    return 'qoo' + str1

def b(str1):
    return 'oop' + str1

func_map = {'a': a, 'b':b}
print func_map['a']('hello')
print func_map['b']('hello')


qoohello
oophello

In [163]:
def get_award_info_dic (element):
    returned_dic = {}
    for row in element:
        th = row.select('th')
        if len(th) > 0:
            th_name = th[0].text.encode('utf-8').strip()
            if ('award_table_tr_1' or \
                'award_table_tr_2' or \
                'award_table_tr_6' in row.get('class')) \
                 and (th_name in tender_awards_map):
                returned_dic[name_map[th_name]] = \
                    tender_awards_map[th_name](row.select('td')[0].text.strip())
    return returned_dic

In [166]:
tr_ele =  get_response_element('gov/51099604_1026604961.txt')
return_dic =  get_award_info_dic(tr_ele)

In [167]:
for key in return_dic:
    print key, return_dic[key]


opening_date 2013-11-05 00:00:00
total_tender_awarding_value 6263804
tender_awarding_date 2013-11-11 00:00:00
entity_code 3.13.30.20
procurement_type 公開招標
procuring_entity 臺灣糖業股份有限公司休閒遊憩事業部
attr_of_procurement <勞務類>96娛樂,文化,體育服務核准人職稱姓名:執行長黃進良更改人職稱姓名:管理師吳政德核准日期:103/04/28核准文號:簽報單位主管核准在案
tender_awarding_announce_date 2014-04-29 00:00:00
subject_of_procurement 烏樹林休閒廣場103年度勞務採購
project_type 非屬愛台十二項計畫
attr_of_tender_awarding 決標公告
budget_value 6288452
floor_price_value 6288452
job_number 1026604961
tender_awarding_type 最低標

In [169]:
a= '\t'
print a
b = r'\t'
print b


	
\t

In [170]:
# -*- coding: utf-8 -*-
import sqlite3
db = sqlite3.connect('tender.sqlite')
#cur = db.cursor()

In [171]:
cur = db.cursor()
cur.execute('''CREATE TABLE IF NOT EXISTS Tender_awards(
 id INTEGER PRIMARY KEY,
 pkAtmMain TEXT,
 procuring_entity TEXT,
 entity_code TEXT,
 attr_of_procurement TEXT,
 opening_date DATETIME,
 procurement_type TEXT,
 tender_awarding_type TEXT,
 project_type TEXT,
 subject_of_procurement TEXT,
 job_number TEXT,
 budget_value BIGINTEGER,
 attr_of_tender_awarding TEXT,
 floor_price_value BIGINTEGER,
 tender_awarding_announce_date DATETIME,
 tender_awarding_date DATETIME,
 total_tender_awarding_value BIGINTEGER
 )''')
db.close()

In [172]:
tr_ele =  get_response_element('gov/51099604_1026604961.txt')
return_dic =  get_award_info_dic(tr_ele)
for key in return_dic:
    print key, return_dic[key]


opening_date 2013-11-05 00:00:00
total_tender_awarding_value 6263804
tender_awarding_date 2013-11-11 00:00:00
entity_code 3.13.30.20
procurement_type 公開招標
procuring_entity 臺灣糖業股份有限公司休閒遊憩事業部
attr_of_procurement <勞務類>96娛樂,文化,體育服務核准人職稱姓名:執行長黃進良更改人職稱姓名:管理師吳政德核准日期:103/04/28核准文號:簽報單位主管核准在案
tender_awarding_announce_date 2014-04-29 00:00:00
subject_of_procurement 烏樹林休閒廣場103年度勞務採購
project_type 非屬愛台十二項計畫
attr_of_tender_awarding 決標公告
budget_value 6288452
floor_price_value 6288452
job_number 1026604961
tender_awarding_type 最低標

In [183]:
#'insert into table(c1, c2) values("val1", "val2")'
#'insert into table(c1, c2) values(?, ?)'
#print return_dic
placeholders =  ', '.join(return_dic.keys())
values =  ', '.join('?' * len(return_dic))

skeleton = 'insert into Tender_awards({}) values({});'
print skeleton.format(placeholders, values)
#return_dic.values()


insert into Tender_awards(opening_date, total_tender_awarding_value, tender_awarding_date, entity_code, procurement_type, procuring_entity, attr_of_procurement, tender_awarding_announce_date, subject_of_procurement, project_type, attr_of_tender_awarding, budget_value, floor_price_value, job_number, tender_awarding_type) values(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
Out[183]:
[datetime.datetime(2013, 11, 5, 0, 0),
 6263804,
 datetime.datetime(2013, 11, 11, 0, 0),
 u'3.13.30.20',
 u'\u516c\u958b\u62db\u6a19',
 u'\u81fa\u7063\u7cd6\u696d\u80a1\u4efd\u6709\u9650\u516c\u53f8\u4f11\u9592\u904a\u61a9\u4e8b\u696d\u90e8',
 u'<\u52de\u52d9\u985e>96\u5a1b\u6a02,\u6587\u5316,\u9ad4\u80b2\u670d\u52d9\u6838\u51c6\u4eba\u8077\u7a31\u59d3\u540d\uff1a\u57f7\u884c\u9577\u9ec3\u9032\u826f\u66f4\u6539\u4eba\u8077\u7a31\u59d3\u540d\uff1a\u7ba1\u7406\u5e2b\u5433\u653f\u5fb7\u6838\u51c6\u65e5\u671f\uff1a103/04/28\u6838\u51c6\u6587\u865f\uff1a\u7c3d\u5831\u55ae\u4f4d\u4e3b\u7ba1\u6838\u51c6\u5728\u6848',
 datetime.datetime(2014, 4, 29, 0, 0),
 u'\u70cf\u6a39\u6797\u4f11\u9592\u5ee3\u5834103\u5e74\u5ea6\u52de\u52d9\u63a1\u8cfc',
 u'\u975e\u5c6c\u611b\u53f0\u5341\u4e8c\u9805\u8a08\u756b',
 u'\u6c7a\u6a19\u516c\u544a',
 6288452,
 6288452,
 u'1026604961',
 u'\u6700\u4f4e\u6a19']

In [186]:
# -*- coding: utf-8 -*-
import sqlite3
db = sqlite3.connect('tender.sqlite')
cur = db.cursor()
tr_ele =  get_response_element('gov/51099604_1026604961.txt')
return_dic =  get_award_info_dic(tr_ele)
placeholders =  ', '.join(return_dic.keys())
values =  ', '.join('?' * len(return_dic))

skeleton = 'insert into Tender_awards({}) values({});'

cur.execute(skeleton.format(placeholders, values), return_dic.values())
db.commit()
db.close()

In [187]:
import sqlite3 as lite
salary = (
 (1 , 'Paul' , 32 , 'California', 20000),
 (2 , 'Allen' , 25 , 'Texas' , 15000),
 (3 , 'Teddy' , 23 , 'Norway' , 20000),
 (4 , 'Mark' , 25 , 'Rich-Mond' , 65000),
 (5 , 'David' , 27 , 'Texas' , 85000),
 (6 , 'Kim' , 22 , 'South-Hall', 45000),
 (7 , 'James' , 24 , 'Houston' , 10000)
)
department = (
(1, 'IT Billing' ,1),
(2, 'Engineerin' ,2),
(3, 'Finance' ,7),
(4, 'Finance' ,5),
(5, 'Finance' ,6)
)

In [191]:
con = lite.connect('test.sqlite')
with con:

    cur = con.cursor()

    cur.execute("DROP TABLE IF EXISTS Salary")
    cur.execute("DROP TABLE IF EXISTS Department")

    cur.execute("CREATE TABLE Salary(ID INT, NAME TEXT, AGE INT, ADDRESS TEXT, SALARY INT)")
    cur.executemany("INSERT INTO Salary VALUES(?, ?, ?, ?, ?)", salary)


    cur.execute("CREATE TABLE Department(ID INT PRIMARY KEY, DEPT CHAR(50), EMP_ID INT)")
    cur.executemany("INSERT INTO Department VALUES(?, ?, ?)", department)
con.close()

In [194]:
dbname = 'test.sqlite'
con = lite.connect(dbname)
items = tuple(range(1,1000000))
import time
with con:
    cur = con.cursor()
    cur.execute("DROP TABLE IF EXISTS ptest")
    cur.execute("CREATE TABLE ptest(ID INT)")
    start = time.time()
    for i in range(0,1000000):
        cur.execute("INSERT INTO ptest VALUES(%s)"%(i))
    end = time.time()
    print end -start
    cur.execute("select count(1) from ptest")
    rows = cur.fetchone()
    print rows
con.close()


4.77899980545
(1000000,)

In [196]:
dbname = 'test.sqlite'
con = lite.connect(dbname)
with con:

    cur = con.cursor()
    cur.execute("DROP TABLE IF EXISTS ptest")
    cur.execute("CREATE TABLE ptest(ID INT)")
    start = time.time()
    cur.executemany("INSERT INTO ptest VALUES(?)",((id_, ) for id_ in xrange(1000000)))
    end = time.time()
    print end -start
    cur.execute("select count(1) from ptest")
    rows = cur.fetchone()
    print rows
con.close()


0.958999872208
(1000000,)

In [197]:
dbname = 'test.sqlite'
con = lite.connect(dbname)
with con:
    cur = con.cursor()
    cur.execute('select * from employee')
    data = cur.fetchall()
    for rec in data:
        print rec
    cur.close()
con.close()


(1, u'qoo', 9000000)
(2, u'oop', 9000000)
(3, u'amy', 500000)

In [199]:
dbname = 'test.sqlite'
con = lite.connect(dbname)
with con:
    cur = con.cursor()
    cur.execute('select * from employee')
    data = cur.fetchone()
    for rec in data:
        print rec
    data = cur.fetchone()
    for rec in data:
        print rec
    cur.close()
con.close()


1
qoo
9000000
2
oop
9000000

In [201]:
dbname = 'test.sqlite'
con = lite.connect(dbname)
with con:
    cur = con.cursor()
    
    
    cur.execute("select * from employee")
    data = cur.fetchall()
    for rec in data:
        print rec
        
        
    cur.execute("update employee set salary = 1000000 where name = 'qoo'")
    
    con.commit()
    
    # rollback
    con.rollback()
    
    
    cur.execute("select * from employee")
    data = cur.fetchall()
    for rec in data:
        print rec

con.close()


(1, u'qoo', 9000000)
(2, u'oop', 9000000)
(3, u'amy', 500000)
(1, u'qoo', 1000000)
(2, u'oop', 9000000)
(3, u'amy', 500000)

In [202]:
def insert_award_info (cur, data_dic, filename):
    file_param = filename.split(".txt")[0].split("_")
    pkAtmMain = file_param[0]
    data_dic['pkAtmMain'] = pkAtmMain
    columns = ', '.join(data_dic.keys())
    placeholders = ', '.join('?' * len(data_dic))
    sql = 'INSERT INTO Tender_awards ({}) VALUES ({})'.format(columns, placeholders)
    cur.execute(sql, data_dic.values())

In [205]:
db = sqlite3.connect('tender.sqlite')
cur = db.cursor()
path = "gov/"
dirs = os.listdir(path) 
print dirs


['51099604_1026604961.txt', '51122915_TS1021101.txt', '51125369_102bali-005-1.txt', '51170299_L1331021201.txt', '51236707_1010062-1.txt', '51238144_C1030415.txt', '51238769_A35014.txt', '51238854_1030307.txt', '51239201_10302.txt', '51239215_002-103-3-3-02.txt', '51239216_NCC-Y103-010.txt', '51239234_103-02-029.txt', '51239245_103A03.txt', '51239274_UCA028C009.txt', '51239281_103-04.txt', '51239284_10318.txt', '51239291_0607L1030328.txt', '51239297_FH03032P033.txt', '51239301_EB10319.txt', '51239328_1030304.txt', '51239331_W10P103017.txt', '51239347_103107.txt', '51239361_W10P103012.txt', '51239371_4197.txt', '51239373_SV-103005.txt', '51239388_BAA0384001.txt', '51239395_1030324.txt', '51239412_103036.txt', '51239414_103230230128.txt', '51239415_10328.txt', '51239416_103026.txt', '51239456_103TMACH09S.txt', '51239468_3410300075-S03023.txt', '51239486_103FC002.txt', '51239509_A103000002.txt', '51239510_1030228.txt', '51239514_Jcs-1030501.txt', '51239526_1030401.txt', '51239533_103-03-080.txt', '51239537_NPM103037.txt', '51239550_D103002.txt', '51239554_FD03007P005.txt', '51239564_1030401.txt', '51239566_103-0052616.txt', '51239597_HJ03242P121.txt', '51239606_1030206.txt', '51239609_102107-1.txt', '51239619_103C2022.txt', '51239621_1030201.txt', '51239647_103A119.txt', '51239650_102108-1.txt', '51239654_B103167.txt', '51239662_B103169.txt', '51239663_1030325-1.txt', '51239664_M03I6247.txt', '51239667_B103162.txt', '51239675_103203.txt', '51239676_tms103064.txt', '51239680_01-10304086.txt', '51239683_1030326AA.txt', '51239688_S102026-1.txt', '51239692_103009.txt', '51239704_103016.txt', '51239708_B103165.txt', '51239709_103-03-27.txt', '51239717_XU03E29P092.txt', '51239723_1030055.txt', '51239725_TC103C023.txt', '51239727_1030247.txt', '51239734_103008.txt', '51239741_103A021.txt', '51239750_IOT-103-H3D010.txt', '51239764_103042111.txt', '51239767_103-5.txt', '51239796_M1030062.txt', '51239827_103-0311-1-007.txt', '51239829_C10300519.txt', '51239837_10320.txt', '51239839_10303A.txt', '51239844_F103-44.txt', '51239853_10307.txt', '51239871_1030418.txt', '51239873_103-0311-1-009.txt', '51239876_ndc103008.txt', '51239883_1021011C.txt', '51239884_103005.txt', '51239903_C103-07.txt', '51239907_103B0308.txt', '51239913_1180300003.txt', '51239914_HST10312.txt', '51239925_2014041603.txt', '51239930_1030428.txt', '51239931_2014031704.txt', '51239933_103018.txt', '51239934_1030314-01.txt', '51239937_Q10306.txt', '51239940_103B0403.txt', '51239943_NCCT103005.txt', '51239946_10308.txt', '51239958_117103006-1.txt', '51239966_52R-140003.txt', '51239970_103004.txt', '51239986_103310.txt', '51239995_NL1030479.txt', '51240003_103311.txt', '51240004_AAC1033001.txt', '51240008_c1030023.txt', '51240009_10302211.txt', '51240016_103traffic012.txt', '51240019_103W009.txt', '51240020_103B0311.txt', '51240050_103A17.txt', '51240054_D103-0219-1.txt', '51240072_B961.txt', '51240090_1030321A.txt', '51240129_H103-006.txt', '51240131_1030331-1.txt', '51240132_103-21-23.txt', '51240142_103-21-20.txt', '51240146_103TFDA-N-001.txt', '51240202_103430930099.txt', '51240240_103310230121.txt', '51240279_103M602.txt', '51240484_103A011A.txt', '51240536_10310.txt', '51240543_DPD2014041102.txt', '51240557_NKUHT1524-10310.txt', '51240570_103B018.txt', '51240622_DPD2014041101.txt', '51240623_TC103C036.txt', '51240655_AASC-103085.txt', '51240712_103B04.txt', '51240739_G103EDU043.txt', '51240824_1030203.txt', '51240875_B102RD03012.txt', '51240942_SL-103046.txt', '51241029_1030011069.txt', '51241062_SL-103050.txt', '51241063_103075.txt', '51241099_103-0021002-029-2-6.txt', '51241103_P103001.txt', '51241108_11-1030328.txt', '51241115_1030404.txt', '51241135_103-TY-142.txt', '51241289_103-7.txt', '51241301_103003.txt', '51241358_Y103016.txt', '51241458_10302.txt', '51241485_103230330119.txt', '51241496_W103ABC071.txt', '51241498_103-2-010.txt', '51241557_NG03035P009P.txt', '51241573_SL-103043.txt', '51241602_103-19.txt', '51241621_MPB10390429.txt', '51241665_B1030422.txt', '51241666_B1030411.txt', '51241674_XW03072P119.txt', '51241675_XW03118P099.txt']

In [206]:
db = sqlite3.connect('tender.sqlite')
cur = db.cursor()
path = "gov/"
dirs = os.listdir(path) 
for filename in dirs:
    ele = get_response_element(path + filename)
    award_info_dic = get_award_info_dic(ele)    
    insert_award_info(cur, award_info_dic, filename)
db.commit()
db.close()

In [ ]: