File IO

现在开始介绍读写文件，这部分内容很简单，模式都是很固定的模式



In [ ]:

    
# 打开文件
fh = open('file.txt','w')

# 其中，w表示写入，r表示读取，a表示追加写入
# t是windows平台特有的所谓text mode(文本模式）,区别在于会自动识别windows平台的换行符。
# 类Unix平台的换行符是\n，而windows平台用的是\r\n两个ASCII字符来表示换行，python内部采用的是\n来表示换行符。
# rt模式下，python在读取文本时会自动把\r\n转换成\n.
# wt模式下，Python写文件时会用\r\n来表示换行。



In [24]:

    
dir(fh)









    Out[24]:





['_CHUNK_SIZE',
 '__class__',
 '__del__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__enter__',
 '__eq__',
 '__exit__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__ne__',
 '__new__',
 '__next__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '_checkClosed',
 '_checkReadable',
 '_checkSeekable',
 '_checkWritable',
 '_finalizing',
 'buffer',
 'close',
 'closed',
 'detach',
 'encoding',
 'errors',
 'fileno',
 'flush',
 'isatty',
 'line_buffering',
 'mode',
 'name',
 'newlines',
 'read',
 'readable',
 'readline',
 'readlines',
 'seek',
 'seekable',
 'tell',
 'truncate',
 'writable',
 'write',
 'writelines']



In [80]:

    
fh.name









    Out[80]:





'file.txt'



In [81]:

    
fh.mode









    Out[81]:





'w'



In [82]:

    
fh.encoding









    Out[82]:





'UTF-8'



In [83]:

    
fh.writable()









    Out[83]:





True



In [84]:

    
fh.write("life is so great,yes we can")









    Out[84]:





27



In [85]:

    
fh.flush()



In [86]:

    
fh.close()



In [88]:

    
f = open('file.txt','r')



In [35]:

    
f.readlines()









    Out[35]:





['life is so greatlife is so great,yes we can']



In [39]:

    
f.close()



In [95]:

    
ff = open('file.txt','a',encoding = 'latin-1')



In [96]:

    
ff.write('aaaaa')









    Out[96]:





5



In [97]:

    
ff.close()

注意在编写读写文件的程序时，一定要使用with语句打开文件，因为这样可以不必显式的使用close语句关闭，而且也创建了一个临时的上下文环境。



In [56]:

    
with open ('file.txt','w') as ff:
    ff.write('aaaaa')

x = 'a'

os模块



In [9]:

    
import os
cur_dir = os.getcwd()



In [10]:

    
for file in os.listdir():
    print(file)









    



.DS_Store
.ipynb_checkpoints
add22.py
bench.sh
data
file.txt
podcasts.xml
potholes.xml
somefile
testdata.csv
testout.csv
Untitled.ipynb
week1st.ipynb
week2nd.ipynb
week3rd_ooDesign.ipynb



In [21]:

    
os.path.isfile('file.txt')









    Out[21]:





True



In [22]:

    
os.path.exists('file.txt')









    Out[22]:





True



In [23]:

    
os.path.isdir('somefile')









    Out[23]:





False

os模块有很多种不同的方法非常的实用，小伙伴可以通过这些方法来简化很多文件相关的操作



In [14]:

    
for dir_name, sub_dirs, files in os.walk(cur_dir):
    print(dir_name,'\n\t', sub_dirs,'\n\t\t', files)









    



/Users/hanlei/Documents/python/note/training 
	 ['.ipynb_checkpoints', 'data'] 
		 ['.DS_Store', 'add22.py', 'bench.sh', 'file.txt', 'podcasts.xml', 'potholes.xml', 'somefile', 'testdata.csv', 'testout.csv', 'Untitled.ipynb', 'week1st.ipynb', 'week2nd.ipynb', 'week3rd_ooDesign.ipynb']
/Users/hanlei/Documents/python/note/training/.ipynb_checkpoints 
	 [] 
		 ['Untitled-checkpoint.ipynb', 'week1st-checkpoint.ipynb', 'week2nd-checkpoint.ipynb', 'week3rd-checkpoint.ipynb']
/Users/hanlei/Documents/python/note/training/data 
	 [] 
		 ['person.xml']

扫描目录



In [15]:

    
for entry in os.scandir(cur_dir):
    if entry.is_dir():
        typ = 'dir'
    elif entry.is_file():
        typ = 'file'
    elif entry.is_symlink():
        typ = 'link'
    else:
        typ = 'unknown'
    print('{name} {typ}'.format(
        name=entry.name,
        typ=typ,
    ))









    



.DS_Store file
.ipynb_checkpoints dir
add22.py file
bench.sh file
data dir
file.txt file
podcasts.xml file
potholes.xml file
somefile file
testdata.csv file
testout.csv file
Untitled.ipynb file
week1st.ipynb file
week2nd.ipynb file
week3rd_ooDesign.ipynb file

切换目录



In [ ]:

    
import os

print('Starting:', os.getcwd())

print('Moving up one:', os.pardir)
os.chdir(os.pardir)

print('After move:', os.getcwd())

构建路径



In [ ]:

    
import os.path

PATHS = [
    ('one', 'two', 'three'),
    ('/', 'one', 'two', 'three'),
    ('/one', '/two', '/three'),
]

for parts in PATHS:
    print('{} : {!r}'.format(parts, os.path.join(*parts)))

分割路径



In [16]:

    
os.path.split(os.getcwd())









    Out[16]:





('/Users/hanlei/Documents/python/note', 'training')

创建和删除目录



In [19]:

    
import os

dir_name = 'os_directories_example'

print('Creating', dir_name)
os.makedirs(dir_name)

file_name = os.path.join(dir_name, 'example.txt')
print('Creating', file_name)
with open(file_name, 'wt') as f:
    f.write('example file')

print('Cleaning up')
os.unlink(file_name)
os.rmdir(dir_name)









    



Creating os_directories_example
Creating os_directories_example/example.txt
Cleaning up



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]:

csv/json/xml



In [44]:

    
import csv
import json
import lxml



In [50]:

    
with open('testdata.csv', 'rt') as f:
    reader = csv.reader(f)
    for row in reader:
        print(row)









    



['Title 1', 'Title 2', 'Title 3', 'Title 4']
['1', 'a', '08/18/07', 'Ã¥']
['2', 'b', '08/19/07', 'â«']
['3', 'c', '08/20/07', 'Ã§']
[]



In [54]:

    
unicode_chars = 'å∫ç'

with open('testout.csv', 'wt') as f:
    writer = csv.writer(f)
    writer.writerow(('Title 1', 'Title 2', 'Title 3', 'Title 4'))
    for i in range(3):
        row = (
            i + 1,
            chr(ord('a') + i),
            '08/{:02d}/07'.format(i + 1),
            unicode_chars[i],
        )
        writer.writerow(row)



In [98]:

    
with open('testdata.csv', 'rt') as f:
    reader = csv.DictReader(f)
    for row in reader:
        print(row)









    



OrderedDict([('Title 1', '1'), ('Title 2', 'a'), ('Title 3', '08/18/07'), ('Title 4', 'Ã¥')])
OrderedDict([('Title 1', '2'), ('Title 2', 'b'), ('Title 3', '08/19/07'), ('Title 4', 'â«')])
OrderedDict([('Title 1', '3'), ('Title 2', 'c'), ('Title 3', '08/20/07'), ('Title 4', 'Ã§')])



In [47]:

    
import json

data = [{'a': 'A', 'b': (2, 4), 'c': 3.0}]
print('DATA:', repr(data))

data_string = json.dumps(data)
print('JSON:', data_string)









    



DATA: [{'a': 'A', 'b': (2, 4), 'c': 3.0}]
JSON: [{"a": "A", "b": [2, 4], "c": 3.0}]



In [100]:

    
import json

data = [{'a': 'A', 'b': (2, 4), 'c': 3.0}]
print('DATA   :', data)

data_string = json.dumps(data)
print('ENCODED:', data_string)

decoded = json.loads(data_string)
print('DECODED:', decoded)

print('ORIGINAL:', type(data[0]['b']))
print('DECODED :', type(decoded[0]['b']))









    



DATA   : [{'a': 'A', 'b': (2, 4), 'c': 3.0}]
ENCODED: [{"a": "A", "b": [2, 4], "c": 3.0}]
DECODED: [{'a': 'A', 'b': [2, 4], 'c': 3.0}]
ORIGINAL: <class 'tuple'>
DECODED : <class 'list'>

csv和json文件非常简单，也都是python的标准库，基本就是几个固定的方法。



In [108]:

    
import json

data = [{'a': 'A', 'b': (2, 4), 'c': 3.0},{'a': 'A', 'b': (2, 4), 'c': 3.0},{'a': 'A', 'b': (2, 4), 'c': 3.0}]
print('DATA:', data)

# print('NORMAL:', json.dumps(data, sort_keys=True))
# print('INDENT:', json.dumps(data, sort_keys=True, indent=2))

import pprint
pp = pprint.PrettyPrinter(indent=4)
pp.pprint(data)









    



DATA: [{'a': 'A', 'b': (2, 4), 'c': 3.0}, {'a': 'A', 'b': (2, 4), 'c': 3.0}, {'a': 'A', 'b': (2, 4), 'c': 3.0}]
[   {'a': 'A', 'b': (2, 4), 'c': 3.0},
    {'a': 'A', 'b': (2, 4), 'c': 3.0},
    {'a': 'A', 'b': (2, 4), 'c': 3.0}]



In [ ]:

xml

首先，小伙伴需要知道xml的一般格式，这个很容易，网上大把的资料，然后就是通过lxml这个库实现xml文件的提取。一般而言，python中使用xml并不是很常见，所需的配置文件基本用json／ini／YAML就够了



In [123]:

    
from urllib.request import urlopen
from xml.etree.ElementTree import parse

# Download the RSS feed and parse it
u = urlopen('http://planet.python.org/rss20.xml')
doc = parse(u)

# Extract and output tags of interest
for item in doc.iterfind('channel/item'):
    title = item.findtext('title')
    date = item.findtext('pubDate')
    link = item.findtext('link')

    print(title)
    print(date)
    print(link)
    print()









    



Patrick Kennedy: Using Docker for Flask Application Development (not just Production!)
Fri, 23 Jun 2017 15:31:24 +0000
http://www.patricksoftwareblog.com/using-docker-for-flask-application-development-not-just-production/

Django Weekly: Django Weekly Issue 44 - Django vs Flask, Translation, Kubernetes, Google Authentication and more
Fri, 23 Jun 2017 12:50:56 +0000
http://djangoweekly.com/blog/post/django-weekly-issue-44-django-vs-flask-translation-kubernetes-google-authentication-and-more

EuroPython: PyData EuroPython 2017
Fri, 23 Jun 2017 10:29:53 +0000
http://blog.europython.eu/post/162158264347

Fabio Zadrozny: mu-repo: Dealing with multiple git repositories
Fri, 23 Jun 2017 09:07:20 +0000
http://feedproxy.google.com/~r/blogspot/pydev/~3/UeW5aEJ_7VE/mu-repo-dealing-with-multiple-git.html

Brad Lucas: Python Virtualenv
Fri, 23 Jun 2017 04:00:00 +0000
http://blog.bradlucas.com/posts/2017-06-23-python-virtualenv/

Hynek Schlawack: Sharing Your Labor of Love: PyPI Quick and Dirty
Fri, 23 Jun 2017 00:00:00 +0000
https://hynek.me/articles/sharing-your-labor-of-love-pypi-quick-and-dirty/

Tarek Ziade: Advanced Molotov example
Thu, 22 Jun 2017 22:00:00 +0000
https://ziade.org/2017/06/23/advanced-molotov-example/

Stephen Ferg: Unicode for dummies &#8212; Encoding
Thu, 22 Jun 2017 21:27:07 +0000
https://pythonconquerstheuniverse.wordpress.com/2012/02/01/unicode-for-dummies-encoding/

Philip Semanchuk: Analyzing the Anglo-Saxonicity of the Baby BNC
Thu, 22 Jun 2017 18:46:13 +0000
http://blog.pyspoken.com/2017/06/22/analyzing-the-anglo-saxonicity-of-the-baby-bnc/

Django Weblog: DjangoCon US Schedule Is Live
Thu, 22 Jun 2017 17:46:57 +0000
https://www.djangoproject.com/weblog/2017/jun/22/djangocon-us-schedule-live/

Mike Driscoll: Book Review: Software Architecture with Python
Thu, 22 Jun 2017 17:15:51 +0000
http://www.blog.pythonlibrary.org/2017/06/22/book-review-software-architecture-with-python/

EuroPython: EuroPython 2017: Call for on-site volunteers
Thu, 22 Jun 2017 15:29:28 +0000
http://blog.europython.eu/post/162126235297

PyCharm: PyCharm Edu 4 EAP: Integration with Stepik for Educators
Thu, 22 Jun 2017 14:52:36 +0000
http://feedproxy.google.com/~r/Pycharm/~3/-pCSrEA9er0/

Python Anywhere: The PythonAnywhere API:  beta now available for all users
Thu, 22 Jun 2017 12:12:36 +0000
http://blog.pythonanywhere.com/154/

A. Jesse Jiryu Davis: New Driver Features for MongoDB 3.6
Thu, 22 Jun 2017 08:12:59 +0000
https://emptysqua.re/blog/driver-features-for-mongodb-3-6/

eGenix.com: Python Meeting Düsseldorf - 2017-06-28
Thu, 22 Jun 2017 08:00:00 +0000
http://www.egenix.com/company/news/Python-Meeting-Duesseldorf-2017-06-28

Continuum Analytics News: It’s Getting Hot, Hot, Hot: Four Industries Turning Up The Data Science Heat
Wed, 21 Jun 2017 17:56:34 +0000
https://www.continuum.io/blog/company-blog/its-getting-hot-hot-hot-four-industries-turning-data-science-heat

PyCharm: PyCharm 2017.2 EAP 4
Wed, 21 Jun 2017 17:00:20 +0000
http://feedproxy.google.com/~r/Pycharm/~3/yOn5wW6X9z4/

Enthought: Enthought Announces Canopy 2.1: A Major Milestone Release for the Python Analysis Environment and Package Distribution
Wed, 21 Jun 2017 16:30:55 +0000
http://blog.enthought.com/enthought-canopy/enthought-announces-canopy-2-1-a-major-milestone-release-for-the-python-analysis-environment-and-package-distribution/

DataCamp: New Course: Deep Learning in Python (first Keras 2.0 online course!)
Wed, 21 Jun 2017 14:10:33 +0000
http://www.datacamp.com/community/blog/new-course-deep-learning-in-python-first-keras-2-0-online-course

EuroPython: EuroPython 2017: Conference App available
Wed, 21 Jun 2017 11:56:34 +0000
http://blog.europython.eu/post/162082203552

Kushal Das: Updates on my Python community work: 16-17
Wed, 21 Jun 2017 10:56:00 +0000
https://kushaldas.in/posts/updates-on-my-python-community-work-16-17.html

Codementor: Building an Hello World Application with Python/Django
Wed, 21 Jun 2017 10:11:00 +0000
https://www.codementor.io/abiodunhassan/building-an-hello-world-application-with-python-django-95sysyr6v

Python Bytes: #31 You should have a change log
Wed, 21 Jun 2017 08:00:00 +0000
https://pythonbytes.fm/episodes/show/31/you-should-have-a-change-log

Talk Python to Me: #117 Functional Python with Coconut
Wed, 21 Jun 2017 08:00:00 +0000
https://talkpython.fm/episodes/show/117/functional-python-with-coconut

xml.etree.ElementTree.parse() 函数解析整个XML文档并将其转换成一个文档对象。然后，你就能使用 find() 、iterfind() 和 findtext() 等方法来搜索特定的XML元素了。这些函数的参数就是某个指定的标签名，例如 channel/item 或 title 。

每次指定某个标签时，你需要遍历整个文档结构。每次搜索操作会从一个起始元素开始进行。同样，每次操作所指定的标签名也是起始元素的相对路径。例如，执行 doc.iterfind('channel/item') 来搜索所有在 channel 元素下面的 item 元素。 doc 代表文档的最顶层(也就是第一级的 rss 元素)。然后接下来的调用 item.findtext() 会从已找到的 item 元素位置开始搜索。

ElementTree 模块中的每个元素有一些重要的属性和方法，在解析的时候非常有用。 tag 属性包含了标签的名字，text 属性包含了内部的文本，而 get() 方法能获取属性值。



In [116]:

    
from xml.etree import ElementTree

with open('podcasts.xml', 'rt') as f:
    tree = ElementTree.parse(f)

print(type(tree))









    



<class 'xml.etree.ElementTree.ElementTree'>



In [118]:

    
from xml.etree import ElementTree
import pprint

with open('podcasts.xml', 'rt') as f:
    tree = ElementTree.parse(f)

for node in tree.iter():
    print(node.tag)









    



opml
head
title
dateCreated
dateModified
body
outline
outline
outline
outline
outline



In [55]:

    
from xml.etree import ElementTree

with open('podcasts.xml', 'rt') as f:
    tree = ElementTree.parse(f)

for node in tree.iter('outline'):
    name = node.attrib.get('text')
    url = node.attrib.get('xmlUrl')
    if name and url:
        print('  %s' % name)
        print('    %s' % url)
    else:
        print(name)









    



Non-tech
  99% Invisible
    http://feeds.99percentinvisible.org/99percentinvisible
Python
  Talk Python to Me
    https://talkpython.fm/episodes/rss
  Podcast.__init__
    http://podcastinit.podbean.com/feed/



In [121]:

    
from xml.etree import ElementTree

with open('podcasts.xml', 'rt') as f:
    tree = ElementTree.parse(f)

for node in tree.findall('.//outline'):
    url = node.attrib.get('xmlUrl')
    if url:
        print(url)









    



http://feeds.99percentinvisible.org/99percentinvisible
https://talkpython.fm/episodes/rss
http://podcastinit.podbean.com/feed/



In [122]:

    
from xml.etree import ElementTree

with open('podcasts.xml', 'rt') as f:
    tree = ElementTree.parse(f)

for node in tree.findall('.//outline/outline'):
    url = node.attrib.get('xmlUrl')
    print(url)









    



http://feeds.99percentinvisible.org/99percentinvisible
https://talkpython.fm/episodes/rss
http://podcastinit.podbean.com/feed/

还有一些没有介绍到的配置文件格式，如YAML和ini。YAML可以认为是json的简化版本，注意YAML的特定格式，尤其是手动修改的之后，例如最后一行必须是空行，缩进注意对齐等，直接用PyYAML这个库就能处理。

对于ini文件，直接用configparser这个标准库处理就好，但是稍微注意一点的就是，一定要遵从这种文件的格式；很多时候会觉得匪夷所思，比如说在ini文件中定义字典，一定要在左大括号后面放置字典的第一个元素，对应的右大括号前一定要有空格。

pickle

对于数据持久化的问题，pickle是其中的一个解决方案。pickle是一个标准库，使用的方法也很简单，和json很类似：loads， dumps



In [2]:

    
# 编码和解码字符串中的数据

import pickle
import pprint# 这个也是标准库，用于更清楚的显示所要的内容

data = [{'a': 'A', 'b': 2, 'c': 3.0}]
print('DATA:', end=' ')
pprint.pprint(data)

data_string = pickle.dumps(data)
print('PICKLE: {!r}'.format(data_string))









    



DATA: [{'a': 'A', 'b': 2, 'c': 3.0}]
PICKLE: b'\x80\x03]q\x00}q\x01(X\x01\x00\x00\x00aq\x02X\x01\x00\x00\x00Aq\x03X\x01\x00\x00\x00bq\x04K\x02X\x01\x00\x00\x00cq\x05G@\x08\x00\x00\x00\x00\x00\x00ua.'



In [3]:

    
import pickle
import pprint

data1 = [{'a': 'A', 'b': 2, 'c': 3.0}]
print('BEFORE: ', end=' ')
pprint.pprint(data1)

data1_string = pickle.dumps(data1)

data2 = pickle.loads(data1_string)
print('AFTER : ', end=' ')
pprint.pprint(data2)

print('SAME? :', (data1 is data2))
print('EQUAL?:', (data1 == data2))









    



BEFORE:  [{'a': 'A', 'b': 2, 'c': 3.0}]
AFTER :  [{'a': 'A', 'b': 2, 'c': 3.0}]
SAME? : False
EQUAL?: True



In [ ]:



In [ ]:



In [ ]:



In [ ]:

函数

其实我们之前已经看到过很多函数相关的例子，但是Python中函数的参数传递还是有一些内容没有涉及：



In [6]:

    
def avg(first, *rest):
    print("first is {}".format(first))
    print("rest are {}".format(rest))
    return (first + sum(rest)) / (1 + len(rest))

# Sample use
print(avg(2, 2)) # 1.5
print(avg(4))
avg(5, 2, 3, 4) # 2.5









    



first is 2
rest are (2,)
2.0
first is 4
rest are ()
4.0
first is 5
rest are (2, 3, 4)






    Out[6]:





3.5



In [14]:

    
import html

def make_element(name, value, **attrs):
    keyvals = [' %s="%s"' % item for item in attrs.items()]
    print(keyvals)
    attr_str = ''.join(keyvals)
    print(attr_str)
    element = '<{name}{attrs}>{value}</{name}>'.format(
                name=name,
                attrs=attr_str,
                value=html.escape(value))
    return element

# Example
# Creates '<item size="large" quantity="6">Albatross</item>'
me = make_element('item', 'Albatross>>&<', size='large', quantity=6)
print(me)
# Creates '<p>&lt;spam&gt;</p>'
make_element('p', '<spam>')









    



[' size="large"', ' quantity="6"']
 size="large" quantity="6"
<item size="large" quantity="6">Albatross&gt;&gt;&amp;&lt;</item>
[]







    Out[14]:





'<p>&lt;spam&gt;</p>'



In [22]:

    
def mininum(*values, clip=None):
    m = min(values)
    if clip is not None:
        m = clip if clip > m else m
    return m

print(mininum(1, 5, 2, -5, 10)) # Returns -5
print(mininum(1,5, 2,-5,10, clip=20))
print(mininum(1, 5, 2, -5, 10, clip=0)) # Returns 0



In [ ]:

    
def mininum(clip, num, value=None, *args, **kwargs):
    pass



In [30]:

    
def mininum(*values, clip=None, **num):
    m = min(values)
    if clip is not None:
        m = clip if clip > m else m
    return m

print(mininum(1, 5, 2, -5, 10))

-5



In [28]:

    
def mininum(*values, clip=None, **num):
    m = min(values)
    if clip is not None:
        m = clip if clip > m else m
    m = num
    return m

print(mininum(1, 5, 2, -5, 10, clip = 20, res = 7))









    



{'res': 7}



In [37]:

    
def my_func():
    return 1,2,3

a,b,c = my_func()
m = my_func()
m, _, n  = my_func()
print(a,b,c)
print(m)
print(m,n)



In [38]:

    
def spam(a, b=51):
    print(a, b)

spam(1) # Ok. a=1, b=42
spam(1, 2) # Ok. a=1, b=2



In [41]:

    
add = lambda x, y: x + y
add(3,4)









    Out[41]:





7



In [42]:

    
add('a','b')









    Out[42]:





'ab'



In [43]:

    
def spam(a, b, c, d):
    print(a, b, c, d)



In [69]:

    
from functools import partial
s1 = partial(spam, 1) # a = 1
s1(2, 3, 4)

partial在程序设计语言里面属于一个比较函数式编程中的柯里化，具体内容很简单，确定了一个函数的某些参数为默认参数，而且不会改变，通过这个函数衍生出来的另一个函数的过程。



In [ ]:

    
def s1(a = 1, b,c,d):
    return a, b, c, d



In [71]:

    
s2 = partial(spam, 2,3)
s2(4,5)



In [46]:

    
arr = [1,2,3,4,5]
m = map((lambda x: x + 3), arr)

map：这里可以表示为transform（转换）的功能，即遍历列表中的所有元素，将每个元素通过相应的函数进行转换，然后逐个放入一个新的列表之中。



In [48]:

    
list(map(pow,[1,2,3],[2,3,4]))









    Out[48]:





[1, 8, 81]



In [51]:

    
lt = [i for i in range(-5, 10, 2)]



In [52]:

    
list(filter((lambda x: x > 0), lt))









    Out[52]:





[1, 3, 5, 7, 9]

filter：这里可以表示为过滤的功能，和shell里面的grep有些类似，即遍历列表中的每个元素，然后根据函数的条件进行判断，保留符合条件的元素，最后逐个放入新的列表之中。



In [53]:

    
from functools import reduce
reduce((lambda x, y: x+y), [1,2,3,4])









    Out[53]:





10

reduce：这里可以表示为归并的功能，一共只能对于加法和乘法进行操作。



In [54]:

    
reduce((lambda x, y: x * y), [2,3,4])









    Out[54]:





24



In [124]:

    
[i for i in range(10) if i % 2]









    Out[124]:





[1, 3, 5, 7, 9]



In [125]:

    
for i in range(10):
    if i % 2:
        print(i)

迭代器和生成器

迭代是数据处理的基石。扫描内存中放不下的数据集时，我们要找到一种惰性获取数据项的方式，即按需一次获取一个数据项。这就是迭代器模式（Iterator pattern）

在Python 中，所有集合都可以迭代。在Python 语言内部，迭代器用于支持：

for 循环
构建和扩展集合类型
逐行遍历文本文件
列表推导、字典推导和集合推导
元组拆包
调用函数时，使用* 拆包实参

如果想要遍历可迭代对象中的所有元素，可以不使用for／while循环：



In [ ]:

    
arr = [1,2,3,4]
a = iter(arr)
next(a)



In [ ]:

    
next(a)



In [ ]:

    
a



In [ ]:

    
next(a)



In [ ]:

    
next(a)



In [ ]:

    
next(a)



In [43]:

    
def manual_iter():
    with open('testdata.csv') as f:
        try:
            while True:
                line = next(f)
                print(line, end='')
        except StopIteration:
            pass
manual_iter()









    



"Title 1","Title 2","Title 3","Title 4"
1,"a",08/18/07,"Ã¥"
2,"b",08/19/07,"â«"
3,"c",08/20/07,"Ã§"



In [48]:

    
with open('testdata.csv') as f:
    while True:
        line = next(f, None)
        if line is None:
            break
        print(line, end='')









    



"Title 1","Title 2","Title 3","Title 4"
1,"a",08/18/07,"Ã¥"
2,"b",08/19/07,"â«"
3,"c",08/20/07,"Ã§"



In [46]:

    
with open('testdata.csv') as f:
    for line in iter(f):        
        print(line, end = '')









    



"Title 1","Title 2","Title 3","Title 4"
1,"a",08/18/07,"Ã¥"
2,"b",08/19/07,"â«"
3,"c",08/20/07,"Ã§"

生成器函数会创建一个生成器对象，包装生成器函数的定义体。把生成器传给next(...)函数时，生成器函数会向前，执行函数定义体中的下一个yield 语句，返回产出的值，并在函数定义体的当前位置暂停。

最终，函数的定义体返回时，外层的生成器对象会抛出StopIteration 异常——这一点与迭代器协议一致。



In [ ]:



In [ ]:



In [ ]:

生成器函数的工作原理

只要Python 函数的定义体中有yield 关键字，该函数就是生成器函数。调用生成器函数时，会返回一个生成器对象。也就是说，生成器函数是生成器工厂。普通的函数与生成器函数在句法上唯一的区别是，在后者的定义体中有yield 关键字。

使用yield构建生成器，生成器完全实现了迭代器接口：



In [ ]:

    
def frange(start, stop, increment):
    x = start
    while x < stop:
        yield x
        x += increment



In [ ]:

    
这里的yield实现了这样的功能：在此处暂停执行，保存当前的状态，然后返回yield右边的值，直到触发下一次迭代，才会执行后续的语句



In [ ]:

    
list(frange(0, 1, 0.125))



In [ ]:

    
def gen_ab():
    print('start')
    yield 'a'
    print('continue')
    yield 'b'
    print('end')

for i in gen_ab():
    print('--->'+i)

一个生成器函数主要特征是它只会回应在迭代中使用到的 next 操作。一旦生成器函数返回退出，迭代终止。我们在迭代中通常使用的for语句会自动处理这些细节。

将一个多层嵌套的序列展开成一个单层列表：



In [ ]:

    
from collections import Iterable

def flatten(items, ignore_types=(str, bytes)):
    for x in items:
        if isinstance(x, Iterable) and not isinstance(x, ignore_types):
            yield from flatten(x)
        else:
            yield x

items = [1, 2, [3, 4, [5, 6], 7], 8]
list(flatten(items))

语句 yield from 在你想在生成器中调用其他生成器作为子例程的时候非常有用。如果你不使用它的话，那么就必须写额外的 for 循环了。比如：



In [ ]:

    
def flatten(items, ignore_types=(str, bytes)):
    for x in items:
        if isinstance(x, Iterable) and not isinstance(x, ignore_types):
            for i in flatten(x):
                yield i
        else:
            yield x

items = [1, 2, [3, 4, [5, 6], 7], 8]
list(flatten(items))

itertools模块

itertools模块里面包含了很多相应的迭代器函数，可以节省很多的脑力，直接拿来就用。



In [35]:

    
s = 'abc'



In [36]:

    
import itertools
for p in itertools.permutations(s):# 所有可能的情况
    print(p)









    



('a', 'b', 'c')
('a', 'c', 'b')
('b', 'a', 'c')
('b', 'c', 'a')
('c', 'a', 'b')
('c', 'b', 'a')



In [39]:

    
items = ['a','b','c','d','e']
for q in itertools.combinations(items,2):
    print(q)









    



('a', 'b')
('a', 'c')
('a', 'd')
('a', 'e')
('b', 'c')
('b', 'd')
('b', 'e')
('c', 'd')
('c', 'e')
('d', 'e')



In [40]:

    
for m in itertools.combinations_with_replacement(items, 3):
    print(m)









    



('a', 'a', 'a')
('a', 'a', 'b')
('a', 'a', 'c')
('a', 'a', 'd')
('a', 'a', 'e')
('a', 'b', 'b')
('a', 'b', 'c')
('a', 'b', 'd')
('a', 'b', 'e')
('a', 'c', 'c')
('a', 'c', 'd')
('a', 'c', 'e')
('a', 'd', 'd')
('a', 'd', 'e')
('a', 'e', 'e')
('b', 'b', 'b')
('b', 'b', 'c')
('b', 'b', 'd')
('b', 'b', 'e')
('b', 'c', 'c')
('b', 'c', 'd')
('b', 'c', 'e')
('b', 'd', 'd')
('b', 'd', 'e')
('b', 'e', 'e')
('c', 'c', 'c')
('c', 'c', 'd')
('c', 'c', 'e')
('c', 'd', 'd')
('c', 'd', 'e')
('c', 'e', 'e')
('d', 'd', 'd')
('d', 'd', 'e')
('d', 'e', 'e')
('e', 'e', 'e')

同时迭代多个序列



In [41]:

    
a = ['a','b','c','d','e']
b = [1,2,3,4,5]



In [42]:

    
for k, v in zip(a, b):
    print(k, v)



In [ ]:



In [ ]:



In [ ]: