notebook.community

Edit and run



In [1]:

    
import string

문자열 상수



In [5]:

    
string.ascii_letters









    Out[5]:





'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'



In [3]:

    
string.ascii_lowercase









    Out[3]:





'abcdefghijklmnopqrstuvwxyz'



In [4]:

    
string.ascii_uppercase









    Out[4]:





'ABCDEFGHIJKLMNOPQRSTUVWXYZ'



In [6]:

    
string.digits









    Out[6]:





'0123456789'



In [8]:

    
digits = ''
for n in range(10):
    digits += str(n)
    
print(digits)

0123456789



In [12]:

    
print('{}, {}'.format('hello', 'world'))









    



hello, world



In [11]:

    
print(u'{}, {}'.format(u'안녕', u'세계'))









    



안녕, 세계



In [13]:

    
print('{:,}'.format(1000000000000000))









    



1,000,000,000,000,000



In [20]:

    
n = 1000000000000000
# 세 번마다 쉼표 삽입
n_comma = ''
for i, n_str in enumerate(str(n)[::-1]):
    #print(i, n_str)
    n_comma += str(n_str)
    if i%3 == 0:
        n_comma += ','
print(n_comma[::-1])









    



,100,000,000,000,000,0

Template



In [21]:

    
from string import Template



In [24]:

    
template = Template('$who likes $what')
template.substitute(who='SJ', what='Python')









    Out[24]:





'SJ likes Python'

문자열 분해 --> list



In [25]:

    
text = 'Hello, World!'



In [26]:

    
text.split()









    Out[26]:





['Hello,', 'world!']



In [27]:

    
text.split(',')









    Out[27]:





['Hello', ' world!']



In [29]:

    
string.capwords('HelloWorld')









    Out[29]:





'Helloworld'

문자열 검색



In [30]:

    
text = 'big python, python for big data'



In [31]:

    
string.find(text, 'python')









    Out[31]:





4



In [38]:

    
string.find(text, 'python', 10)









    Out[38]:





12



In [33]:

    
text[4]









    Out[33]:





'p'



In [34]:

    
string.rfind(text, 'python')









    Out[34]:





12



In [35]:

    
text[12]









    Out[35]:





'p'



In [37]:

    
if string.find(text, 'python') == string.rfind(text, 'python'):
    print('유일한 단어')
else:
    print('해당 단어가 두 개 이상')









    



해당 단어가 두 개 이상

연습문제

다음과 같은 문자열이 저장된 변수에서 python 또는 Python이라는 문자열의 시작 인덱스를 모두 포함한 리스트를 반환하시오.

text = 'big python, python for big data. Python is the best!'

text_kr = '빅 파이썬, 빅데이터를 위한 파이썬. 파이썬이 최고'



In [63]:

    
text = 'big python, python for big data. Python is the best!'
text_kr = u'빅 파이썬, 빅데이터를 위한 파이썬. 파이썬이 최고'



In [39]:

    
string.lower('HELLO')









    Out[39]:





'hello'



In [46]:

    
string.find(text, 'javascript')









    Out[46]:





-1



In [65]:

    
string.find(text_kr, u'파이썬')









    Out[65]:





2



In [49]:

    
text = 'big python, python for big data. Python is the best!'
string.find(text, 'python', 18)









    Out[49]:





-1



In [53]:

    
text = 'big python, python for big data. Python is the best!'

text_lower = string.lower(text)

start_index_list = []
start_idx = 0
while start_idx < len(text_lower):
    idx = string.find(text_lower, 'python', start_idx)
    if idx == -1:
        break
    start_index_list.append(idx)
    start_idx = idx+len('python')
    
print(start_index_list)









    



[4, 12, 33]



In [55]:

    
text = 'big python, python for big data. Python is the best!'
text.lower().count('python')









    Out[55]:





3

연습문제

python이라는 단어만 제외하고 문장을 다시 구성.

text = 'big python, python for big data. Python is the best!'

-->'big , for big data. is the best!'



In [56]:

    
string.replace(text, 'python', 'javascript')









    Out[56]:





'big javascript, javascript for big data. Python is the best!'



In [58]:

    
string.replace(text, 'python', '')









    Out[58]:





'big ,  for big data. Python is the best!'



In [59]:

    
string.replace(text, 'python', '', 1)









    Out[59]:





'big , python for big data. Python is the best!'

정규 표현식 (regular expression)

해결하고자 하는 문제: 텍스트에서 특정한 패턴의 탐색, 추출



In [109]:

    
text = """이성주의 전화번호는 010-1234-5678입니다. 
이메일은 seongjoo@codebasic.co입니다. 김성주의 전화번호는 010-4321-8765입니다."""



In [113]:

    
import re
phonePattern = re.compile(r'\d{3}-\d{4}-\d{4}')
phonePattern.findall(text)









    Out[113]:





['010-1234-5678', '010-4321-8765']



In [ ]: