字符串操作

去空格及特殊符号


In [2]:
s = ' hello, world!'
print(s.strip())
print(s.lstrip(' hello, '))
print(s.rstrip('!'))


hello, world!
world!
 hello, world

连接字符串


In [3]:
sStr1 = 'strcat'
sStr2 = 'append'
sStr1 += sStr2
print(sStr1)


strcatappend

查找字符


In [7]:
# 找不到报错,用find代替
sStr1 = 'strchr'
sStr2 = 'r'
nPos = sStr1.index(sStr2)
print(nPos)
print(sStr1.index('o'))


2
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-7-083cc2706035> in <module>()
      4 nPos = sStr1.index(sStr2)
      5 print(nPos)
----> 6 print(sStr1.index('o'))

ValueError: substring not found

比较字符串


In [9]:
#python 3不再有cmp,用operator
import operator
a = 'strchr'
b = 'strch'
print(operator.lt(a,b))
print(operator.le(a,b))
print(operator.eq(a,b))
print(operator.ne(a,b))
print(operator.gt(a,b))
print(operator.ge(a,b))
print(operator.lt(a,b))
print(cmp(sStr2,sStr1))


False
False
False
True
True
True
False
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-9-6397f9d8f9b2> in <module>()
     10 print(operator.ge(a,b))
     11 print(operator.lt(a,b))
---> 12 print(cmp(sStr2,sStr1))

NameError: name 'cmp' is not defined

字符串中的大小写转换


In [11]:
sStr1 = 'JCstrlwr'
sStr1 = sStr1.upper()
sStr2 = sStr1.lower()
print(sStr1)
print(sStr2)


JCSTRLWR
jcstrlwr

切片


In [14]:
#字符串为不可变类型
s1 = 'hello,world!'
s2 = s1[0:7:2]   #2为步长
s3 = s1[1:3]
print(s2)
print(s3)


hlow
el

翻转字符串


In [15]:
sStr1 = 'abcdefg'
sStr1 = sStr1[::-1]  #通过反向步进切片
print(sStr1)


gfedcba

查找字符和字符串


In [16]:
#查找字符和字符串用find,index找不到会报错
sStr1 = 'abcdefg'
sStr2 = 'cde'
print(sStr1.find(sStr2))
print(sStr1.find('f'))


2
5

分割字符串


In [18]:
sStr1 = 'ab,cde,fgh,ijk'
sStr2 = ','
sStr1 = sStr1[sStr1.find(sStr2) + 1:]
print(sStr1)
s = 'ab,cde,fgh,ijk'
print(s.split(','))


cde,fgh,ijk
['ab', 'cde', 'fgh', 'ijk']

计算字符串中出现频次最多的字符


In [40]:
from collections import Counter
s = 'ABaacbdbccdeeffg123klmn'
s = [x for x in s.lower() if x.isalpha()]
print(s)
count = Counter(s)
print(count)
print(type(count))
print(count.keys())
print(count.values())
print(count.items())
m = max(count.values())
print(m)
result_list = [x for (x,y) in count.items() if y==m]
print(result_list)
print(sorted(result_list))


['a', 'b', 'a', 'a', 'c', 'b', 'd', 'b', 'c', 'c', 'd', 'e', 'e', 'f', 'f', 'g', 'k', 'l', 'm', 'n']
Counter({'a': 3, 'b': 3, 'c': 3, 'd': 2, 'e': 2, 'f': 2, 'g': 1, 'k': 1, 'l': 1, 'm': 1, 'n': 1})
<class 'collections.Counter'>
dict_keys(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'k', 'l', 'm', 'n'])
dict_values([3, 3, 3, 2, 2, 2, 1, 1, 1, 1, 1])
dict_items([('a', 3), ('b', 3), ('c', 3), ('d', 2), ('e', 2), ('f', 2), ('g', 1), ('k', 1), ('l', 1), ('m', 1), ('n', 1)])
3
['a', 'b', 'c']
['a', 'b', 'c']

In [41]:
#version 1
import re
from collections import Counter

def get_max_value_v1(text):
    text = text.lower()
    result = re.findall('[a-zA-Z]', text)  # 去掉列表中的符号符
    print(result)
    count = Counter(result)  # Counter({'l': 3, 'o': 2, 'd': 1, 'h': 1, 'r': 1, 'e': 1, 'w': 1})
    count_list = list(count.values())
    max_value = max(count_list)
    max_list = []
    for k, v in count.items():
        if v == max_value:
            max_list.append(k)
    max_list = sorted(max_list)
    return max_list
print(get_max_value_v1('ABaacbdbccdeeffg123klmn'))


['a', 'b', 'a', 'a', 'c', 'b', 'd', 'b', 'c', 'c', 'd', 'e', 'e', 'f', 'f', 'g', 'k', 'l', 'm', 'n']
['a', 'b', 'c']

In [20]:
#version 2
from collections import Counter

def get_max_value(text):
    count = Counter([x for x in text.lower() if x.isalpha()])
    m = max(count.values())
    return sorted([x for (x, y) in count.items() if y == m])[0]

In [42]:
#version 3
import string

def get_max_value(text):
    text = text.lower()
    return max(string.ascii_lowercase, key=text.count)
print(get_max_value('ABaacbdbccdeeffg123klmn'))


a

In [50]:
"""
max(iterable, key, default) 求迭代器的最大值,其中iterable 为迭代器,
max会for i in … 遍历一遍这个迭代器,然后将迭代器的每一个返回值当做参数传给key=func 中的func(一般用lambda表达式定义) ,
然后将func的执行结果传给key,然后以key为标准进行大小的判断。
"""
max(range(5),key = lambda x : x>3)


Out[50]:
4

In [51]:
max(range(6), key = lambda x : x>2)
# >>> 3
# 带入key函数中,各个元素返回布尔值,相当于[False, False, False, True, True, True]
# key函数要求返回值为True,有多个符合的值,则挑选第一个。

max([3,5,2,1,4,3,0], key = lambda x : x)
# >>> 5
# 带入key函数中,各个元素返回自身的值,最大的值为5,返回5.

max('ah', 'bf', key=lambda x: x[1])
# >>> 'ah'
# 带入key函数,各个字符串返回最后一个字符,其中'ah'的h要大于'bf'中的f,因此返回'ah'

max('ah', 'bf', key=lambda x: x[0])
# >>> 'bf'
# 带入key函数,各个字符串返回第一个字符,其中'bf'的b要大于'ah'中的a,因此返回'bf'

text = 'Hello World'
max('abcdefghijklmnopqrstuvwxyz', key=text.count)
# >>> 'l'
# 带入key函数,返回各个字符在'Hello World'中出现的次数,出现次数最多的字符为'l',因此输出'l'


Out[51]:
'l'

计算字符串中字符出现的最大次数


In [52]:
#T  h  e     M  i  s  s  i  s  s  i  p  p  i     R  i  v  e  r
#[1, 1, 2, 2, 1, 5, 4, 4, 5, 4, 4, 5, 2, 2, 5, 2, 1, 5, 1, 2, 1]
s = 'The Mississippi River'
s = s.lower()
map_result = list(map(s.count,s))
print(map_result)
print(max(map_result))


[1, 1, 2, 2, 1, 5, 4, 4, 5, 4, 4, 5, 2, 2, 5, 2, 2, 5, 1, 2, 2]
5

In [ ]: