In [1]:
import pprint
import re
In [2]:
def get_links_from_md_regex(file_path, p=re.compile(r'\[(.+?)\]\((.+?)\)')):
l = []
with open(file_path) as f:
for i, line in enumerate(f):
for result in p.findall(line):
l.append([file_path, i + 1, result[0], result[1]])
return l
In [3]:
pprint.pprint(get_links_from_md_regex('data/src/md/test1.md'))
In [4]:
s = '[text](URL_with())'
In [5]:
p1 = re.compile(r'\[(.+?)\]\((.+?)\)')
print(p1.findall(s))
In [6]:
p2 = re.compile(r'\[(.+?)\]\((.+)\)')
print(p2.findall(s))
In [7]:
s_inline = '[text](URL_with()) and [text2](URL2)'
In [8]:
print(p2.findall(s_inline))
In [9]:
p3 = re.compile(r"\[(.+?)\]\(([a-zA-Z0-9-._~:/?#@!$&'()*+,;=%]+)\)")
print(p3.findall(s))
In [10]:
print(p3.findall(s_inline))
In [11]:
s_jp = '[text](日本語URL)'
In [12]:
print(p1.findall(s_jp))
In [13]:
print(p2.findall(s_jp))
In [14]:
print(p3.findall(s_jp))
In [15]:
p4 = re.compile(r"\[(.+?)\]\(([a-zA-Z0-9-._~:/?#@!$&'()*+,;=%\w]+)\)")
print(p4.findall(s_jp))
In [16]:
s_jp_inline = '[text](日本語URL)と括弧(xxx)。'
In [17]:
print(p1.findall(s_jp_inline))
In [18]:
print(p2.findall(s_jp_inline))
In [19]:
print(p3.findall(s_jp_inline))
In [20]:
print(p4.findall(s_jp_inline))
In [21]:
p_title = re.compile(r'\[(.+?)\]\(([a-zA-Z0-9-._~:/?#@!$&\'()*+,;=%\w]+)( "(.+)")?\)')
In [22]:
print(p_title.findall('[text](URL "title")'))
In [23]:
print(p_title.findall('[text](URL)'))