In [ ]:
"""
格式化文件的大小
"""
SUFFIXES = {
1000:['KB','MB','GB','TB','PB','EB','ZB','YB'],
1024:['KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB']
}
def approximate_size(size, aKiloByteIs1024Bytes=True):
'''
Convert a file size to human-readable form.
Keyword arguments:
size -- file size in bytes
aKiloByteIs1024Bytes -- if True (default), use multiples of 1024
if False, use multiples of 1000
Returns: string
'''
if size < 0:
raise ValueError('number must be non-nagative')
multiple = 1024 if aKiloByteIs1024Bytes else 1000
for suffix in SUFFIXES[multiple]:
size /= multiple
if size < multiple:
return '{0:.1f} {1}'.format(size, suffix)
raise ValueError('number too large')
if __name__ == '__main__':
print(approximate_size(1000000000000, False))
print(approximate_size(1000000000000))
print(approximate_size.__doc__)
In [7]:
import sys
sys.path
try:
from lxml import etree
except ImportError:
import xml.etree.ElementTree as etree
Out[7]:
In [9]:
11/2
Out[9]:
In [10]:
s = '''Finished files are the re‐
sult of years of scientif‐
ic study combined with the
experience of years.'''
s.splitlines()
Out[10]:
In [12]:
s.lower().count('f')
Out[12]:
In [17]:
a_string = 'My alphabet starts where your alphabet ends.'
a_string[3:11]
a_string[3:-3]
a_string[18:]
Out[17]:
In [19]:
by = b'abcd\x65'
by
Out[19]:
In [20]:
type(by)
Out[20]:
In [22]:
by += b'\xff'
by
Out[22]:
In [23]:
len(by)
Out[23]:
In [24]:
by[0]
Out[24]:
In [35]:
def is_same_dic(dict1, dict2):
'''
判断两个字典的值是否相同
Keyword arguments:
dict1 -- 字典1
dict2 -- 字典2
Returns: bool
'''
if len(dict1.items()) != len(dict2.items()):
return False
length = len(dict1.items())
for k, v in dict1.items():
if k in dict2.keys():
if dict2[k] != v:
return False
else:
return False
return True
d1 = {'_id': '5c6519622a2817410f9b76fd', 'name': 'Python开发', 'ISBN': '978-7-5159-0954-6', 'jzc': '123456', 'author': '大侠00', 'notes': '这是备注信息', 'press': '机械工业', 'price': '85.0', 'category': 'IT'}
d2 = {'_id': '5c6519622a2817410f9b76fd', 'name': 'Python开发', 'ISBN': '978-7-5159-0954-6', 'jzc': '123456', 'author': '大侠00', 'notes': '这是备注信息', 'press': '机械工业', 'price': '85.0', 'category': 'IT','dd':'dd'}
print(d1)
print(is_same_dic(d2, d1))
In [50]:
# 正则表达式
s = 'ROAD100 ROADNORTH BROAD MAIN ROAD'
s.replace('ROAD', 'RD.')
import re
#$ 表示“字符串结尾”。(还有一个相 应的表示“字符串开头”的字符 ^ )
#只会匹配字符串结尾的‘ROAD’,而不 会匹配到‘BROAD’中的‘ROAD’
re.sub('ROAD$','RD.', s)
Out[50]:
In [52]:
import re
s = '100 BROAD ROAD APT. 3'
# 在字符串的任意位置 匹配独立的‘ROAD’单词”不管是在字符串的结束还是开始,或者中间的任意一个位置
re.sub(r'\bROAD\b', 'RD.', s)
#'100 BROAD RD. APT 3'
Out[52]:
In [59]:
import re
# phonePattern = re.compile(r'^(\d{3})‐(\d{3})‐ (\d{4})$')
# phonePattern.search('800‐555‐1212').groups()
# ('800', '555', '1212')
# phonePattern = re.compile(r'^(\d{3})‐(\d{3})‐ (\d{4})‐(\d+)$')
# phonePattern.search('800‐555‐1212‐1234').groups()
# ('800', '555', '1212', '1234')
# phonePattern = re.compile(r'^(\d{3})\D+(\d{3})\D+(\d{4})\D+(\d+)$')
# 3 个数字的分组,\D,匹配除了数字以外的任意字符,+,一个或多个,\D+,匹配一个或一个以上的非数字字符; \D+替换'-'
# phonePattern = re.compile(r'''
# # don't match beginning of string,
# number can start anywhere
# (\d{3}) # area code is 3 digits (e.g. '800')
# \D* # optional separator is any number of
# non‐digits
# (\d{3}) # trunk is 3 digits (e.g. '555')
# \D* # optional separator
# (\d{4}) # rest of number is 4 digits (e.g.
# '1212')
# \D* # optional separator
# (\d*) # extension is optional and can be any
# number of digits
# $ # end of string
# ''', re.VERBOSE)
phonePattern = re.compile(r'(\d{3})\D*(\d{3})\D*(\d{4})\D*(\d*)$')
phonePattern.search('1‐800‐555‐1212').groups()
• ^ 匹配字符串开始位置。
• $ 匹配字符串结束位置。
• \b 匹配一个单词边界。
• \d 匹配一个数字。
• \D 匹配一个任意的非数字字符。
• x?匹配可选的 x 字符。换句话说,就是 0 个或者 1 个 x 字
符。
• x*匹配0个或更多的x。
• x+匹配1个或者更多x。
• x{n,m}匹配n到m个x,至少n个,不能超过m个。
• (a|b|c) 匹配单独的任意一个 a 或者 b 或者 c。
• (x) 这是一个组,它会记忆它匹配到的字符串。你可以用
re.search 返回的匹配对象的 groups()函数来获取到匹配的值
Out[59]:
In [ ]: