In [1]:
import re
In [2]:
url = 'https://duckduckgo.com/l/?kh=-1&uddg=https%3A%2F%2Fdocs.python.org%2F3.3%2Flibrary%2Fre.html'
In [3]:
prefix_pattern = re.compile(r'^https://duckduckgo.com/.*?&uddg=')
url = re.sub(prefix_pattern, '', url, 1)
url
Out[3]:
In [4]:
escape_pattern = re.compile(r'%([0-9A-F][0-9A-F])')
re.split(escape_pattern, url)
Out[4]:
In [5]:
from urllib.parse import unquote
In [6]:
unquote(url)
Out[6]:
In [7]:
#!/usr/bin/env python3
import sys
import re
from urllib.parse import unquote
def main(lines):
prefix_pattern = re.compile(r'^https://duckduckgo.com/.*?&uddg=')
for url in lines:
# print(1, repr(url))
url = re.sub(prefix_pattern, '', url, 1)
# print(2, repr(url))
print(unquote(url))
if __name__ == '__main__':
# main(sys.stdin)
main(['https://duckduckgo.com/l/?kh=-1&uddg=https%3A%2F%2Fdocs.python.org%2F3.3%2Flibrary%2Fre.html'])