notebook.community

Edit and run



In [1]:

    
import pprint
import re



In [2]:

    
def get_links_from_md_regex(file_path, p=re.compile(r'\[(.+?)\]\((.+?)\)')):
    l = []
    with open(file_path) as f:
        for i, line in enumerate(f):
            for result in p.findall(line):
                l.append([file_path, i + 1, result[0], result[1]])
    return l



In [3]:

    
pprint.pprint(get_links_from_md_regex('data/src/md/test1.md'))









    



[['data/src/md/test1.md', 1, 'Instagram', 'https://www.instagram.com/'],
 ['data/src/md/test1.md', 1, 'Twitter', 'https://twitter.com'],
 ['data/src/md/test1.md', 3, '[Py] Python.org', 'https://www.python.org/'],
 ['data/src/md/test1.md', 4, 'relative link', '../test/']]



In [4]:

    
s = '[text](URL_with())'



In [5]:

    
p1 = re.compile(r'\[(.+?)\]\((.+?)\)')
print(p1.findall(s))









    



[('text', 'URL_with(')]



In [6]:

    
p2 = re.compile(r'\[(.+?)\]\((.+)\)')
print(p2.findall(s))









    



[('text', 'URL_with()')]



In [7]:

    
s_inline = '[text](URL_with()) and [text2](URL2)'



In [8]:

    
print(p2.findall(s_inline))









    



[('text', 'URL_with()) and [text2](URL2')]



In [9]:

    
p3 = re.compile(r"\[(.+?)\]\(([a-zA-Z0-9-._~:/?#@!$&'()*+,;=%]+)\)")
print(p3.findall(s))









    



[('text', 'URL_with()')]



In [10]:

    
print(p3.findall(s_inline))









    



[('text', 'URL_with()'), ('text2', 'URL2')]



In [11]:

    
s_jp = '[text](日本語URL)'



In [12]:

    
print(p1.findall(s_jp))









    



[('text', '日本語URL')]



In [13]:

    
print(p2.findall(s_jp))









    



[('text', '日本語URL')]



In [14]:

    
print(p3.findall(s_jp))

[]



In [15]:

    
p4 = re.compile(r"\[(.+?)\]\(([a-zA-Z0-9-._~:/?#@!$&'()*+,;=%\w]+)\)")
print(p4.findall(s_jp))









    



[('text', '日本語URL')]



In [16]:

    
s_jp_inline = '[text](日本語URL)と括弧(xxx)。'



In [17]:

    
print(p1.findall(s_jp_inline))









    



[('text', '日本語URL')]



In [18]:

    
print(p2.findall(s_jp_inline))









    



[('text', '日本語URL)と括弧(xxx')]



In [19]:

    
print(p3.findall(s_jp_inline))

[]



In [20]:

    
print(p4.findall(s_jp_inline))









    



[('text', '日本語URL)と括弧(xxx')]



In [21]:

    
p_title = re.compile(r'\[(.+?)\]\(([a-zA-Z0-9-._~:/?#@!$&\'()*+,;=%\w]+)( "(.+)")?\)')



In [22]:

    
print(p_title.findall('[text](URL "title")'))









    



[('text', 'URL', ' "title"', 'title')]



In [23]:

    
print(p_title.findall('[text](URL)'))









    



[('text', 'URL', '', '')]