In [4]:
rows = [
{'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
{'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]
# 根据任意dict field 来排序输入结果行
from operator import itemgetter
rows_by_fname = sorted(rows,key=itemgetter('fname'))
rows_by_uid = sorted(rows,key=itemgetter('uid'))
print(rows_by_fname,'\n')
print(rows_by_uid)
In [5]:
rows_by_lfname = sorted(rows,key=itemgetter('lname','fname'))
print(rows_by_lfname)
In [7]:
rows_by_fname = sorted(rows,key=lambda r:r['fname'])
rows_by_lfname = sorted(rows,key=lambda r:(r['fname'],r['lname']))
# 以上方法 与 itemgetter 类似 主要是 sorted func 中 key argument
In [8]:
mi = min(rows,key=itemgetter('uid'))
ma = max(rows,key=itemgetter('uid'))
print(mi,'||\n',ma)
In [13]:
class User:
def __init__(self,user_id):
self.user_id = user_id
def __repr__(self):
return 'User({})'.format(self.user_id)
# 这里__repr__很重要!!!!!
users = [User(23), User(3), User(88)]
print(users)
print(sorted(users, key=lambda u: u.user_id))
In [12]:
from operator import attrgetter
sorted(users, key=attrgetter('user_id'))
Out[12]:
In [ ]:
by_name = sorted(users,key=attrgettter('last_name','first_name'))
In [14]:
a = min(users,key=attrgetter('user_id'))
b = max(users,key=attrgetter('user_id'))
print(a,'\n',b)
In [3]:
rows = [
{'address': '5412 N CLARK', 'date': '07/01/2012'},
{'address': '5148 N CLARK', 'date': '07/04/2012'},
{'address': '5800 E 58TH', 'date': '07/02/2012'},
{'address': '2122 N CLARK', 'date': '07/03/2012'},
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
{'address': '1060 W ADDISON', 'date': '07/02/2012'},
{'address': '4801 N BROADWAY', 'date': '07/01/2012'},
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
# 假设想在date 分组后的data 块上进行迭代 首先需要按照指定字段date 进行排序 后调用itertools.groupby()进行分组
from operator import itemgetter
from itertools import groupby
# Sort by the desited field first
rows.sort(key=itemgetter('date'))
# Iterate in groups
for date, item in groupby(rows,key=itemgetter('date')):
print(date)
for i in item:
print(' ',i)
In [6]:
from collections import defaultdict
rows_by_date = defaultdict(list)
for row in rows:
rows_by_date[row['date']].append(row)
# 可轻松访问就能对每个指定date 进行对应记录访问
for r in rows_by_date['07/01/2012']:
print(r)
In [7]:
# way 1
mlist = [1,4,-5,10,2,-7,10,3,2,-1]
[n for n in mlist if n > 0]
Out[7]:
In [8]:
[n for n in mlist if n < 0]
Out[8]:
In [10]:
pos = (n for n in mlist if n > 0)
pos
for x in pos:
print(x)
In [11]:
values = ['1','2','-4','-',4,'N/A',5]
def is_int(val):
try:
x = int(val)
return True
except ValueError:
return False
ivals = list(filter(is_int,values))
In [13]:
print(ivals)
# filter 函数创建一个迭代器 so if 以一个list 就得像instance 那样使用list 来转换
In [14]:
mylist = [1,4,2,-5,4,7,9,2,3,-1]
import math
[math.sqrt(n) for n in mylist if n > 0]
Out[14]:
In [15]:
clip = [n if n > 0 else 0 for n in mylist]
In [16]:
clip
Out[16]:
In [18]:
slip = [n if n < 0 else 0 for n in mylist]
In [19]:
slip
Out[19]:
In [28]:
'''
当你需要用另一个相关联的sequence来过滤某个序列时
'''
addresses = [
'5412 N CLARK',
'5148 N CLARK',
'5800 E 58TH',
'2122 N CLARK'
'5645 N RAVENSWOOD',
'1060 W ADDISON',
'4801 N BROADWAY',
'1039 W GRANVILLE',
]
counts = [ 0, 3, 10, 4, 1, 7, 6, 1]
'''
将那些count 值对应大于 5 的地址全部输出
'''
if len(addresses) == len(counts):
print('addresses == counts')
from itertools import compress
from collections import defaultdict
more5 = [n > 5 for n in counts]
cm = dict()
for i in range(len(counts)):
cm[counts[i]] = more5[i]
In [29]:
cm
Out[29]:
In [30]:
list(compress(addresses,more5))
Out[30]: