Commit 42c4dfe2 authored by Dan Pascu's avatar Dan Pascu

Fixed http URL regular expression

parent 635ead6e
...@@ -1500,7 +1500,7 @@ class HtmlProcessor(object): ...@@ -1500,7 +1500,7 @@ class HtmlProcessor(object):
re.compile(r""" re.compile(r"""
(?P<body> (?P<body>
https?://(?:[^:@]+(?::[^@]*)?@)?(?P<host>[a-z0-9.-]+)(?::\d*)? # scheme :// [ user [ : password ] @ ] host [ : port ] https?://(?:[^:@]+(?::[^@]*)?@)?(?P<host>[a-z0-9.-]+)(?::\d*)? # scheme :// [ user [ : password ] @ ] host [ : port ]
(?:/(?:[\w/%!$@*&='~:;,.+-]*(?:\([\w/%!$@#*&='~:;,.+-]*\))?)*)? # [ / path] (?:/(?:[\w/%!$@#*&='~:;,.+-]*(?:\([\w/%!$@#*&='~:;,.+-]*\))?)*)? # [ / path]
(?:\?(?:[\w%!$@*&='~:;,.+-]*(?:\([\w%!$@*&='~:;,.+-]*\))?)*)? # [ ? query] (?:\?(?:[\w%!$@*&='~:;,.+-]*(?:\([\w%!$@*&='~:;,.+-]*\))?)*)? # [ ? query]
) )
""", re.I|re.U|re.X), """, re.I|re.U|re.X),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment